Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b134b7d6
Commit
b134b7d6
authored
May 16, 2022
by
carlushuang
Browse files
Merge remote-tracking branch 'origin/develop' into cpu_avx2
parents
090ba885
9f71ff48
Changes
211
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
349 additions
and
174 deletions
+349
-174
library/include/ck/library/host_tensor/host_reduction.hpp
library/include/ck/library/host_tensor/host_reduction.hpp
+5
-2
library/include/ck/library/host_tensor/host_tensor.hpp
library/include/ck/library/host_tensor/host_tensor.hpp
+2
-2
library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp
...reference_tensor_operation/cpu/reference_batched_gemm.hpp
+2
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
...e_tensor_operation/cpu/reference_conv_backward_weight.hpp
+18
-10
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
...eference_tensor_operation/cpu/reference_conv_bwd_data.hpp
+59
-31
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+57
-31
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
...nsor_operation/cpu/reference_conv_fwd_bias_activation.hpp
+18
-10
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
..._operation/cpu/reference_conv_fwd_bias_activation_add.hpp
+18
-10
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
...library/reference_tensor_operation/cpu/reference_gemm.hpp
+3
-5
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
...reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
+2
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation.hpp
...e_tensor_operation/cpu/reference_gemm_bias_activation.hpp
+2
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation_add.hpp
...nsor_operation/cpu/reference_gemm_bias_activation_add.hpp
+2
-1
library/include/ck/library/utility/conv_util.hpp
library/include/ck/library/utility/conv_util.hpp
+42
-42
library/include/ck/library/utility/op_instance_engine.hpp
library/include/ck/library/utility/op_instance_engine.hpp
+2
-2
library/src/host_tensor/CMakeLists.txt
library/src/host_tensor/CMakeLists.txt
+23
-2
library/src/host_tensor/device.cpp
library/src/host_tensor/device.cpp
+14
-15
library/src/host_tensor/host_tensor.cpp
library/src/host_tensor/host_tensor.cpp
+2
-2
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+72
-1
library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
...tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
+3
-3
library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
...operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
+3
-2
No files found.
library/include/ck/library/host_tensor/host_reduction.hpp
View file @
b134b7d6
...
@@ -211,7 +211,8 @@ struct ReductionHost
...
@@ -211,7 +211,8 @@ struct ReductionHost
AccDataType
accuVal
=
ReduceOpZeroVal
<
AccDataType
,
ReduceOpId
>
();
AccDataType
accuVal
=
ReduceOpZeroVal
<
AccDataType
,
ReduceOpId
>
();
IndexDataType
accuIndex
=
0
;
IndexDataType
accuIndex
=
0
;
for
(
IndexDataType
i
=
0
;
i
<
reduce_dim_indexes
.
size
();
i
++
)
for
(
IndexDataType
i
=
0
;
i
<
ck
::
type_convert
<
IndexDataType
>
(
reduce_dim_indexes
.
size
());
i
++
)
{
{
auto
offset_reduce
=
auto
offset_reduce
=
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
...
@@ -246,7 +247,9 @@ struct ReductionHost
...
@@ -246,7 +247,9 @@ struct ReductionHost
auto
offset_invariant
=
auto
offset_invariant
=
get_offset_from_index
<
NumInvariantDim
>
(
invariantStrides
,
invariant_index
);
get_offset_from_index
<
NumInvariantDim
>
(
invariantStrides
,
invariant_index
);
for
(
IndexDataType
i
=
0
;
i
<
reduce_dim_indexes
.
size
();
i
++
)
for
(
IndexDataType
i
=
0
;
i
<
ck
::
type_convert
<
IndexDataType
>
(
reduce_dim_indexes
.
size
());
i
++
)
{
{
auto
offset_reduce
=
auto
offset_reduce
=
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
...
...
library/include/ck/library/host_tensor/host_tensor.hpp
View file @
b134b7d6
...
@@ -154,7 +154,7 @@ struct ParallelTensorFunctor
...
@@ -154,7 +154,7 @@ struct ParallelTensorFunctor
{
{
std
::
array
<
std
::
size_t
,
NDIM
>
indices
;
std
::
array
<
std
::
size_t
,
NDIM
>
indices
;
for
(
in
t
idim
=
0
;
idim
<
NDIM
;
++
idim
)
for
(
std
::
size_
t
idim
=
0
;
idim
<
NDIM
;
++
idim
)
{
{
indices
[
idim
]
=
i
/
mStrides
[
idim
];
indices
[
idim
]
=
i
/
mStrides
[
idim
];
i
-=
indices
[
idim
]
*
mStrides
[
idim
];
i
-=
indices
[
idim
]
*
mStrides
[
idim
];
...
@@ -316,7 +316,7 @@ float check_error(const Tensor<T>& ref, const Tensor<T>& result)
...
@@ -316,7 +316,7 @@ float check_error(const Tensor<T>& ref, const Tensor<T>& result)
constexpr
float
eps
=
1e-10
;
constexpr
float
eps
=
1e-10
;
for
(
in
t
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
for
(
std
::
size_
t
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
{
{
float
ref_v
=
ck
::
type_convert
<
float
>
(
ref
.
mData
[
i
]);
float
ref_v
=
ck
::
type_convert
<
float
>
(
ref
.
mData
[
i
]);
float
result_v
=
ck
::
type_convert
<
float
>
(
result
.
mData
[
i
]);
float
result_v
=
ck
::
type_convert
<
float
>
(
result
.
mData
[
i
]);
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp
View file @
b134b7d6
...
@@ -84,7 +84,8 @@ struct ReferenceBatchedGemm : public device::BaseOperator
...
@@ -84,7 +84,8 @@ struct ReferenceBatchedGemm : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
View file @
b134b7d6
...
@@ -70,18 +70,25 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
...
@@ -70,18 +70,25 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
auto
f_kcyx
=
[
&
](
auto
k
,
auto
c
,
auto
y
,
auto
x
)
{
auto
f_kcyx
=
[
&
](
auto
k
,
auto
c
,
auto
y
,
auto
x
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
in
t
n
=
0
;
n
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
0
];
++
n
)
for
(
std
::
size_
t
n
=
0
;
n
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
0
];
++
n
)
{
{
for
(
in
t
ho
=
0
;
ho
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
2
];
++
ho
)
for
(
std
::
size_
t
ho
=
0
;
ho
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
2
];
++
ho
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
I0
]
+
y
*
arg
.
conv_dilations_
[
I0
]
-
auto
hi
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
I0
])
+
arg
.
in_left_pads_
[
I0
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
I0
])
-
for
(
int
wo
=
0
;
wo
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
3
];
++
wo
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
I0
]);
for
(
std
::
size_t
wo
=
0
;
wo
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
3
];
++
wo
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
I1
]
+
x
*
arg
.
conv_dilations_
[
I1
]
-
auto
wi
=
arg
.
in_left_pads_
[
I1
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
I1
])
+
if
(
hi
>=
0
&&
hi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
I1
])
-
wi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
I1
]);
if
(
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
{
{
float
v_out
;
float
v_out
;
float
v_in
;
float
v_in
;
...
@@ -114,7 +121,8 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
...
@@ -114,7 +121,8 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/*stream_config*/
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
View file @
b134b7d6
...
@@ -78,15 +78,18 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -78,15 +78,18 @@ struct ReferenceConvBwdData : public device::BaseOperator
AccDataType
v_acc
=
0
;
AccDataType
v_acc
=
0
;
for
(
in
t
x
=
0
;
x
<
X
;
++
x
)
for
(
std
::
size_
t
x
=
0
;
x
<
X
;
++
x
)
{
{
int
w_tmp
=
wi
+
arg
.
in_left_pads_
[
0
]
-
x
*
arg
.
conv_dilations_
[
0
];
auto
w_tmp
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
wi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
]);
if
(
w_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
if
(
w_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
{
{
int
wo
=
w_tmp
/
arg
.
conv_strides_
[
0
];
auto
wo
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
w_tmp
)
/
if
(
wo
>=
0
&&
wo
<
Wo
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
{
{
for
(
in
t
k
=
0
;
k
<
K
;
++
k
)
for
(
std
::
size_
t
k
=
0
;
k
<
K
;
++
k
)
{
{
AccDataType
v_out
=
0
;
AccDataType
v_out
=
0
;
AccDataType
v_wei
=
0
;
AccDataType
v_wei
=
0
;
...
@@ -128,24 +131,32 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -128,24 +131,32 @@ struct ReferenceConvBwdData : public device::BaseOperator
AccDataType
v_acc
=
0
;
AccDataType
v_acc
=
0
;
for
(
in
t
y
=
0
;
y
<
Y
;
++
y
)
for
(
std
::
size_
t
y
=
0
;
y
<
Y
;
++
y
)
{
{
int
h_tmp
=
hi
+
arg
.
in_left_pads_
[
0
]
-
y
*
arg
.
conv_dilations_
[
0
];
auto
h_tmp
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
hi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
]);
if
(
h_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
if
(
h_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
{
{
int
ho
=
h_tmp
/
arg
.
conv_strides_
[
0
];
auto
ho
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
h_tmp
)
/
if
(
ho
>=
0
&&
ho
<
Ho
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
if
(
ho
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
ho
)
<
Ho
)
{
{
for
(
in
t
x
=
0
;
x
<
X
;
++
x
)
for
(
std
::
size_
t
x
=
0
;
x
<
X
;
++
x
)
{
{
int
w_tmp
=
auto
w_tmp
=
wi
+
arg
.
in_left_pads_
[
1
]
-
x
*
arg
.
conv_dilations_
[
1
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
]);
if
(
w_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
if
(
w_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
{
{
int
wo
=
w_tmp
/
arg
.
conv_strides_
[
1
];
auto
wo
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
w_tmp
)
/
if
(
wo
>=
0
&&
wo
<
Wo
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
1
]);
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
{
{
for
(
in
t
k
=
0
;
k
<
K
;
++
k
)
for
(
std
::
size_
t
k
=
0
;
k
<
K
;
++
k
)
{
{
AccDataType
v_out
=
0
;
AccDataType
v_out
=
0
;
AccDataType
v_wei
=
0
;
AccDataType
v_wei
=
0
;
...
@@ -194,33 +205,49 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -194,33 +205,49 @@ struct ReferenceConvBwdData : public device::BaseOperator
AccDataType
v_acc
=
0
;
AccDataType
v_acc
=
0
;
for
(
in
t
z
=
0
;
z
<
Z
;
++
z
)
for
(
std
::
size_
t
z
=
0
;
z
<
Z
;
++
z
)
{
{
int
d_tmp
=
di
+
arg
.
in_left_pads_
[
0
]
-
z
*
arg
.
conv_dilations_
[
0
];
auto
d_tmp
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
di
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
]);
if
(
d_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
if
(
d_tmp
%
arg
.
conv_strides_
[
0
]
==
0
)
{
{
int
do_
=
d_tmp
/
arg
.
conv_strides_
[
0
];
auto
do_
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
d_tmp
)
/
if
(
do_
>=
0
&&
do_
<
Do
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
0
]);
if
(
do_
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
do_
)
<
Do
)
{
{
for
(
in
t
y
=
0
;
y
<
Y
;
++
y
)
for
(
std
::
size_
t
y
=
0
;
y
<
Y
;
++
y
)
{
{
int
h_tmp
=
auto
h_tmp
=
hi
+
arg
.
in_left_pads_
[
1
]
-
y
*
arg
.
conv_dilations_
[
1
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
hi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
1
]);
if
(
h_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
if
(
h_tmp
%
arg
.
conv_strides_
[
1
]
==
0
)
{
{
int
ho
=
h_tmp
/
arg
.
conv_strides_
[
1
];
auto
ho
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
h_tmp
)
/
if
(
ho
>=
0
&&
ho
<
Ho
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
1
]);
if
(
ho
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
ho
)
<
Ho
)
{
{
for
(
in
t
x
=
0
;
x
<
X
;
++
x
)
for
(
std
::
size_
t
x
=
0
;
x
<
X
;
++
x
)
{
{
int
w_tmp
=
wi
+
arg
.
in_left_pads_
[
2
]
-
auto
w_tmp
=
x
*
arg
.
conv_dilations_
[
2
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wi
)
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
2
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
2
]);
if
(
w_tmp
%
arg
.
conv_strides_
[
2
]
==
0
)
if
(
w_tmp
%
arg
.
conv_strides_
[
2
]
==
0
)
{
{
int
wo
=
w_tmp
/
arg
.
conv_strides_
[
2
];
auto
wo
=
if
(
wo
>=
0
&&
wo
<
Wo
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
w_tmp
)
/
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
conv_strides_
[
2
]);
if
(
wo
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wo
)
<
Wo
)
{
{
for
(
in
t
k
=
0
;
k
<
K
;
++
k
)
for
(
std
::
size_
t
k
=
0
;
k
<
K
;
++
k
)
{
{
AccDataType
v_out
=
0
;
AccDataType
v_out
=
0
;
AccDataType
v_wei
=
0
;
AccDataType
v_wei
=
0
;
...
@@ -264,7 +291,8 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -264,7 +291,8 @@ struct ReferenceConvBwdData : public device::BaseOperator
}
}
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
b134b7d6
#ifndef REFERENCE_CONV_FWD_HPP
#pragma once
#define REFERENCE_CONV_FWD_HPP
#include <iostream>
#include <iostream>
#include <type_traits>
#include <type_traits>
#include <sstream>
#include <sstream>
#include "stream_config.hpp"
#include "device_base.hpp"
#include "device_base.hpp"
#include "host_tensor.hpp"
#include "host_tensor.hpp"
...
@@ -88,13 +89,16 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -88,13 +89,16 @@ struct ReferenceConvFwd : public device::BaseOperator
auto
f_ncw
=
[
&
](
auto
n
,
auto
k
,
auto
wo
)
{
auto
f_ncw
=
[
&
](
auto
n
,
auto
k
,
auto
wo
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
in
t
c
=
0
;
c
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
for
(
std
::
size_
t
c
=
0
;
c
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
{
{
for
(
in
t
x
=
0
;
x
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
2
];
++
x
)
for
(
std
::
size_
t
x
=
0
;
x
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
2
];
++
x
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
0
]
+
x
*
arg
.
conv_dilations_
[
0
]
-
auto
wi
=
arg
.
in_left_pads_
[
0
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
0
])
+
if
(
wi
>=
0
&&
wi
<
arg
.
input_
.
mDesc
.
GetLengths
()[
2
])
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
if
(
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
arg
.
input_
.
mDesc
.
GetLengths
()[
2
])
{
{
float
v_in
;
float
v_in
;
float
v_wei
;
float
v_wei
;
...
@@ -128,18 +132,26 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -128,18 +132,26 @@ struct ReferenceConvFwd : public device::BaseOperator
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
in
t
c
=
0
;
c
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
for
(
std
::
size_
t
c
=
0
;
c
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
{
{
for
(
in
t
y
=
0
;
y
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
for
(
std
::
size_
t
y
=
0
;
y
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
0
]
+
y
*
arg
.
conv_dilations_
[
0
]
-
auto
hi
=
arg
.
in_left_pads_
[
0
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
for
(
int
x
=
0
;
x
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
x
=
0
;
x
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
1
]
+
x
*
arg
.
conv_dilations_
[
1
]
-
auto
wi
=
arg
.
in_left_pads_
[
1
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
if
(
hi
>=
0
&&
hi
<
arg
.
input_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
wi
<
arg
.
input_
.
mDesc
.
GetLengths
()[
3
])
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
if
(
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
arg
.
input_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
arg
.
input_
.
mDesc
.
GetLengths
()[
3
])
{
{
float
v_in
;
float
v_in
;
float
v_wei
;
float
v_wei
;
...
@@ -174,23 +186,37 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -174,23 +186,37 @@ struct ReferenceConvFwd : public device::BaseOperator
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
d_o
,
auto
ho
,
auto
wo
)
{
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
d_o
,
auto
ho
,
auto
wo
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
in
t
c
=
0
;
c
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
for
(
std
::
size_
t
c
=
0
;
c
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
{
{
for
(
in
t
z
=
0
;
z
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
2
];
++
z
)
for
(
std
::
size_
t
z
=
0
;
z
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
2
];
++
z
)
{
{
int
di
=
d_o
*
arg
.
conv_strides_
[
0
]
+
z
*
arg
.
conv_dilations_
[
0
]
-
auto
di
=
arg
.
in_left_pads_
[
0
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
d_o
*
arg
.
conv_strides_
[
0
])
+
for
(
int
y
=
0
;
y
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
3
];
++
y
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
z
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
y
=
0
;
y
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
3
];
++
y
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
1
]
+
y
*
arg
.
conv_dilations_
[
1
]
-
auto
hi
=
arg
.
in_left_pads_
[
1
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
1
])
+
for
(
int
x
=
0
;
x
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
4
];
++
x
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
1
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
for
(
std
::
size_t
x
=
0
;
x
<
arg
.
weight_
.
mDesc
.
GetLengths
()[
4
];
++
x
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
2
]
+
auto
wi
=
x
*
arg
.
conv_dilations_
[
2
]
-
arg
.
in_left_pads_
[
2
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
if
(
di
>=
0
&&
di
<
arg
.
input_
.
mDesc
.
GetLengths
()[
2
]
&&
arg
.
conv_strides_
[
2
])
+
hi
>=
0
&&
hi
<
arg
.
input_
.
mDesc
.
GetLengths
()[
3
]
&&
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
wi
>=
0
&&
wi
<
arg
.
input_
.
mDesc
.
GetLengths
()[
4
])
arg
.
conv_dilations_
[
2
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
2
]);
if
(
di
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
di
)
<
arg
.
input_
.
mDesc
.
GetLengths
()[
2
]
&&
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
arg
.
input_
.
mDesc
.
GetLengths
()[
3
]
&&
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
arg
.
input_
.
mDesc
.
GetLengths
()[
4
])
{
{
float
v_in
;
float
v_in
;
float
v_wei
;
float
v_wei
;
...
@@ -226,7 +252,8 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -226,7 +252,8 @@ struct ReferenceConvFwd : public device::BaseOperator
}
}
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/*stream_config*/
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
@@ -286,4 +313,3 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -286,4 +313,3 @@ struct ReferenceConvFwd : public device::BaseOperator
}
// namespace host
}
// namespace host
}
// namespace tensor_operation
}
// namespace tensor_operation
}
// namespace ck
}
// namespace ck
#endif
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
View file @
b134b7d6
...
@@ -73,18 +73,25 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
...
@@ -73,18 +73,25 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
in
t
c
=
0
;
c
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
for
(
std
::
size_
t
c
=
0
;
c
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
{
{
for
(
in
t
y
=
0
;
y
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
for
(
std
::
size_
t
y
=
0
;
y
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
0
]
+
y
*
arg
.
conv_dilations_
[
0
]
-
auto
hi
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
arg
.
in_left_pads_
[
0
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
for
(
int
x
=
0
;
x
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
x
=
0
;
x
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
1
]
+
x
*
arg
.
conv_dilations_
[
1
]
-
auto
wi
=
arg
.
in_left_pads_
[
1
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
if
(
hi
>=
0
&&
hi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
wi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
if
(
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
{
{
float
v_in
;
float
v_in
;
float
v_wei
;
float
v_wei
;
...
@@ -117,7 +124,8 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
...
@@ -117,7 +124,8 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
View file @
b134b7d6
...
@@ -76,18 +76,25 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
...
@@ -76,18 +76,25 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
in
t
c
=
0
;
c
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
for
(
std
::
size_
t
c
=
0
;
c
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
{
{
for
(
in
t
y
=
0
;
y
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
for
(
std
::
size_
t
y
=
0
;
y
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
0
]
+
y
*
arg
.
conv_dilations_
[
0
]
-
auto
hi
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
arg
.
in_left_pads_
[
0
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
for
(
int
x
=
0
;
x
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
x
=
0
;
x
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
1
]
+
x
*
arg
.
conv_dilations_
[
1
]
-
auto
wi
=
arg
.
in_left_pads_
[
1
];
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
if
(
hi
>=
0
&&
hi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
wi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
if
(
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
{
{
float
v_in
;
float
v_in
;
float
v_wei
;
float
v_wei
;
...
@@ -123,7 +130,8 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
...
@@ -123,7 +130,8 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/*stream_config*/
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
View file @
b134b7d6
#ifndef REFERENCE_GEMM_HPP
#pragma once
#define REFERENCE_GEMM_HPP
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include "device_base.hpp"
#include "device_base.hpp"
...
@@ -82,7 +80,8 @@ struct ReferenceGemm : public device::BaseOperator
...
@@ -82,7 +80,8 @@ struct ReferenceGemm : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
@@ -129,4 +128,3 @@ struct ReferenceGemm : public device::BaseOperator
...
@@ -129,4 +128,3 @@ struct ReferenceGemm : public device::BaseOperator
}
// namespace host
}
// namespace host
}
// namespace tensor_operation
}
// namespace tensor_operation
}
// namespace ck
}
// namespace ck
#endif
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
View file @
b134b7d6
...
@@ -82,7 +82,8 @@ struct ReferenceGemmBias2D : public device::BaseOperator
...
@@ -82,7 +82,8 @@ struct ReferenceGemmBias2D : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation.hpp
View file @
b134b7d6
...
@@ -85,7 +85,8 @@ struct ReferenceGemmBiasActivation : public device::BaseOperator
...
@@ -85,7 +85,8 @@ struct ReferenceGemmBiasActivation : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_activation_add.hpp
View file @
b134b7d6
...
@@ -91,7 +91,8 @@ struct ReferenceGemmBiasActivationAdd : public device::BaseOperator
...
@@ -91,7 +91,8 @@ struct ReferenceGemmBiasActivationAdd : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
const
StreamConfig
&
/* stream_config */
=
StreamConfig
{})
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/utility/conv_
fwd_
util.hpp
→
library/include/ck/library/utility/conv_util.hpp
View file @
b134b7d6
...
@@ -146,19 +146,19 @@ struct ConvParams
...
@@ -146,19 +146,19 @@ struct ConvParams
const
std
::
vector
<
ck
::
index_t
>&
left_pads
,
const
std
::
vector
<
ck
::
index_t
>&
left_pads
,
const
std
::
vector
<
ck
::
index_t
>&
right_pads
);
const
std
::
vector
<
ck
::
index_t
>&
right_pads
);
ck
::
index_t
num_dim_spatial
;
ck
::
index_t
num_dim_spatial
_
;
ck
::
index_t
N
;
ck
::
index_t
N
_
;
ck
::
index_t
K
;
ck
::
index_t
K
_
;
ck
::
index_t
C
;
ck
::
index_t
C
_
;
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
;
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
_
;
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
;
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
_
;
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
;
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
_
;
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
;
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
_
;
std
::
vector
<
ck
::
index_t
>
input_left_pads
;
std
::
vector
<
ck
::
index_t
>
input_left_pads
_
;
std
::
vector
<
ck
::
index_t
>
input_right_pads
;
std
::
vector
<
ck
::
index_t
>
input_right_pads
_
;
std
::
vector
<
ck
::
index_t
>
GetOutputSpatialLengths
()
const
;
std
::
vector
<
ck
::
index_t
>
GetOutputSpatialLengths
()
const
;
};
};
...
@@ -268,10 +268,10 @@ void run_reference_convolution_forward(const ConvParams& params,
...
@@ -268,10 +268,10 @@ void run_reference_convolution_forward(const ConvParams& params,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
input
,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
input
,
weights
,
weights
,
output
,
output
,
params
.
conv_filter_strides
,
params
.
conv_filter_strides
_
,
params
.
conv_filter_dilations
,
params
.
conv_filter_dilations
_
,
params
.
input_left_pads
,
params
.
input_left_pads
_
,
params
.
input_right_pads
,
params
.
input_right_pads
_
,
PassThrough
{},
PassThrough
{},
PassThrough
{},
PassThrough
{},
PassThrough
{});
PassThrough
{});
...
@@ -437,17 +437,17 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
...
@@ -437,17 +437,17 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
virtual
InTensorsTuple
GetInputTensors
()
const
override
virtual
InTensorsTuple
GetInputTensors
()
const
override
{
{
std
::
vector
<
std
::
size_t
>
input_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
),
std
::
vector
<
std
::
size_t
>
input_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
_
),
static_cast
<
std
::
size_t
>
(
params_
.
C
)};
static_cast
<
std
::
size_t
>
(
params_
.
C
_
)};
input_dims
.
insert
(
std
::
end
(
input_dims
),
input_dims
.
insert
(
std
::
end
(
input_dims
),
std
::
begin
(
params_
.
input_spatial_lengths
),
std
::
begin
(
params_
.
input_spatial_lengths
_
),
std
::
end
(
params_
.
input_spatial_lengths
));
std
::
end
(
params_
.
input_spatial_lengths
_
));
std
::
vector
<
std
::
size_t
>
filter_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
K
),
std
::
vector
<
std
::
size_t
>
filter_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
K
_
),
static_cast
<
std
::
size_t
>
(
params_
.
C
)};
static_cast
<
std
::
size_t
>
(
params_
.
C
_
)};
filter_dims
.
insert
(
std
::
end
(
filter_dims
),
filter_dims
.
insert
(
std
::
end
(
filter_dims
),
std
::
begin
(
params_
.
filter_spatial_lengths
),
std
::
begin
(
params_
.
filter_spatial_lengths
_
),
std
::
end
(
params_
.
filter_spatial_lengths
));
std
::
end
(
params_
.
filter_spatial_lengths
_
));
auto
input
=
std
::
make_unique
<
Tensor
<
InDataType
>>
(
auto
input
=
std
::
make_unique
<
Tensor
<
InDataType
>>
(
get_host_tensor_descriptor
(
input_dims
,
InLayout
{}));
get_host_tensor_descriptor
(
input_dims
,
InLayout
{}));
...
@@ -465,8 +465,8 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
...
@@ -465,8 +465,8 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
virtual
TensorPtr
<
OutDataType
>
GetOutputTensor
()
const
override
virtual
TensorPtr
<
OutDataType
>
GetOutputTensor
()
const
override
{
{
std
::
vector
<
std
::
size_t
>
output_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
),
std
::
vector
<
std
::
size_t
>
output_dims
{
static_cast
<
std
::
size_t
>
(
params_
.
N
_
),
static_cast
<
std
::
size_t
>
(
params_
.
K
)};
static_cast
<
std
::
size_t
>
(
params_
.
K
_
)};
output_dims
.
insert
(
std
::
end
(
output_dims
),
output_dims
.
insert
(
std
::
end
(
output_dims
),
std
::
begin
(
output_spatial_lengths_
),
std
::
begin
(
output_spatial_lengths_
),
std
::
end
(
output_spatial_lengths_
));
std
::
end
(
output_spatial_lengths_
));
...
@@ -522,16 +522,16 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
...
@@ -522,16 +522,16 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
static_cast
<
InDataType
*>
(
in_device_buffers
[
0
]
->
GetDeviceBuffer
()),
static_cast
<
InDataType
*>
(
in_device_buffers
[
0
]
->
GetDeviceBuffer
()),
static_cast
<
WeiDataType
*>
(
in_device_buffers
[
1
]
->
GetDeviceBuffer
()),
static_cast
<
WeiDataType
*>
(
in_device_buffers
[
1
]
->
GetDeviceBuffer
()),
static_cast
<
OutDataType
*>
(
out_device_buffer
->
GetDeviceBuffer
()),
static_cast
<
OutDataType
*>
(
out_device_buffer
->
GetDeviceBuffer
()),
params_
.
N
,
params_
.
N
_
,
params_
.
K
,
params_
.
K
_
,
params_
.
C
,
params_
.
C
_
,
params_
.
input_spatial_lengths
,
params_
.
input_spatial_lengths
_
,
params_
.
filter_spatial_lengths
,
params_
.
filter_spatial_lengths
_
,
output_spatial_lengths_
,
output_spatial_lengths_
,
params_
.
conv_filter_strides
,
params_
.
conv_filter_strides
_
,
params_
.
conv_filter_dilations
,
params_
.
conv_filter_dilations
_
,
params_
.
input_left_pads
,
params_
.
input_left_pads
_
,
params_
.
input_right_pads
,
params_
.
input_right_pads
_
,
InElementwiseOp
{},
InElementwiseOp
{},
WeiElementwiseOp
{},
WeiElementwiseOp
{},
OutElementwiseOp
{});
OutElementwiseOp
{});
...
@@ -539,20 +539,20 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
...
@@ -539,20 +539,20 @@ class ConvFwdOpInstance : public ck::utils::OpInstance<OutDataType, InDataType,
virtual
std
::
size_t
GetFlops
()
const
override
virtual
std
::
size_t
GetFlops
()
const
override
{
{
return
get_flops
(
params_
.
N
,
return
get_flops
(
params_
.
N
_
,
params_
.
C
,
params_
.
C
_
,
params_
.
K
,
params_
.
K
_
,
params_
.
filter_spatial_lengths
,
params_
.
filter_spatial_lengths
_
,
output_spatial_lengths_
);
output_spatial_lengths_
);
}
}
virtual
std
::
size_t
GetBtype
()
const
override
virtual
std
::
size_t
GetBtype
()
const
override
{
{
return
get_btype
<
InDataType
,
WeiDataType
,
OutDataType
>
(
params_
.
N
,
return
get_btype
<
InDataType
,
WeiDataType
,
OutDataType
>
(
params_
.
N
_
,
params_
.
C
,
params_
.
C
_
,
params_
.
K
,
params_
.
K
_
,
params_
.
input_spatial_lengths
,
params_
.
input_spatial_lengths
_
,
params_
.
filter_spatial_lengths
,
params_
.
filter_spatial_lengths
_
,
output_spatial_lengths_
);
output_spatial_lengths_
);
}
}
...
...
library/include/ck/library/utility/op_instance_engine.hpp
View file @
b134b7d6
...
@@ -128,7 +128,7 @@ class OpInstanceRunEngine
...
@@ -128,7 +128,7 @@ class OpInstanceRunEngine
template
<
typename
OpInstancePtr
>
template
<
typename
OpInstancePtr
>
ProfileBestConfig
Profile
(
const
std
::
vector
<
OpInstancePtr
>&
op_ptrs
,
ProfileBestConfig
Profile
(
const
std
::
vector
<
OpInstancePtr
>&
op_ptrs
,
int
nrepeat
=
100
,
bool
time_kernel
=
false
,
bool
do_verification
=
false
,
bool
do_verification
=
false
,
bool
do_log
=
false
)
bool
do_log
=
false
)
{
{
...
@@ -143,7 +143,7 @@ class OpInstanceRunEngine
...
@@ -143,7 +143,7 @@ class OpInstanceRunEngine
if
(
op_ptr
->
IsSupportedArgument
(
argument
.
get
()))
if
(
op_ptr
->
IsSupportedArgument
(
argument
.
get
()))
{
{
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
float
avg_time
=
invoker
->
Run
(
argument
.
get
(),
nrepeat
);
float
avg_time
=
invoker
->
Run
(
argument
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
}
);
std
::
size_t
flops
=
op_instance_
.
GetFlops
();
std
::
size_t
flops
=
op_instance_
.
GetFlops
();
std
::
size_t
num_btype
=
op_instance_
.
GetBtype
();
std
::
size_t
num_btype
=
op_instance_
.
GetBtype
();
...
...
library/src/host_tensor/CMakeLists.txt
View file @
b134b7d6
...
@@ -10,10 +10,31 @@ set(HOST_TENSOR_SOURCE
...
@@ -10,10 +10,31 @@ set(HOST_TENSOR_SOURCE
host_tensor.cpp
host_tensor.cpp
)
)
add_library
(
host_tensor SHARED
${
HOST_TENSOR_SOURCE
}
)
add_library
(
host_tensor STATIC
${
HOST_TENSOR_SOURCE
}
)
add_library
(
composable_kernel::host_tensor ALIAS host_tensor
)
target_compile_features
(
host_tensor PUBLIC
)
target_compile_features
(
host_tensor PUBLIC
)
set_target_properties
(
host_tensor PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
host_tensor PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:
${
HALF_INCLUDE_DIR
}
>
)
target_include_directories
(
host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:
${
HALF_INCLUDE_DIR
}
>
)
install
(
TARGETS host_tensor LIBRARY DESTINATION lib
)
target_include_directories
(
host_tensor PUBLIC
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/utility>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host_tensor>"
)
install
(
TARGETS host_tensor
EXPORT host_tensorTargets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
RUNTIME DESTINATION
${
CMAKE_INSTALL_BINDIR
}
INCLUDES DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
install
(
EXPORT host_tensorTargets
FILE composable_kernelhost_tensorTargets.cmake
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
clang_tidy_check
(
host_tensor
)
clang_tidy_check
(
host_tensor
)
library/src/host_tensor/device.cpp
View file @
b134b7d6
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
DeviceMem
::
DeviceMem
(
std
::
size_t
mem_size
)
:
mMemSize
(
mem_size
)
DeviceMem
::
DeviceMem
(
std
::
size_t
mem_size
)
:
mMemSize
(
mem_size
)
{
{
hip
GetErrorString
(
hipMalloc
(
static_cast
<
void
**>
(
&
mpDeviceBuf
),
mMemSize
));
hip
_check_error
(
hipMalloc
(
static_cast
<
void
**>
(
&
mpDeviceBuf
),
mMemSize
));
}
}
void
*
DeviceMem
::
GetDeviceBuffer
()
{
return
mpDeviceBuf
;
}
void
*
DeviceMem
::
GetDeviceBuffer
()
{
return
mpDeviceBuf
;
}
...
@@ -12,18 +12,17 @@ std::size_t DeviceMem::GetBufferSize() { return mMemSize; }
...
@@ -12,18 +12,17 @@ std::size_t DeviceMem::GetBufferSize() { return mMemSize; }
void
DeviceMem
::
ToDevice
(
const
void
*
p
)
void
DeviceMem
::
ToDevice
(
const
void
*
p
)
{
{
hipGetErrorString
(
hip_check_error
(
hipMemcpy
(
mpDeviceBuf
,
const_cast
<
void
*>
(
p
),
mMemSize
,
hipMemcpyHostToDevice
));
hipMemcpy
(
mpDeviceBuf
,
const_cast
<
void
*>
(
p
),
mMemSize
,
hipMemcpyHostToDevice
));
}
}
void
DeviceMem
::
FromDevice
(
void
*
p
)
void
DeviceMem
::
FromDevice
(
void
*
p
)
{
{
hip
GetErrorString
(
hipMemcpy
(
p
,
mpDeviceBuf
,
mMemSize
,
hipMemcpyDeviceToHost
));
hip
_check_error
(
hipMemcpy
(
p
,
mpDeviceBuf
,
mMemSize
,
hipMemcpyDeviceToHost
));
}
}
void
DeviceMem
::
SetZero
()
{
hip
GetErrorString
(
hipMemset
(
mpDeviceBuf
,
0
,
mMemSize
));
}
void
DeviceMem
::
SetZero
()
{
hip
_check_error
(
hipMemset
(
mpDeviceBuf
,
0
,
mMemSize
));
}
DeviceMem
::~
DeviceMem
()
{
hip
GetErrorString
(
hipFree
(
mpDeviceBuf
));
}
DeviceMem
::~
DeviceMem
()
{
hip
_check_error
(
hipFree
(
mpDeviceBuf
));
}
DeviceAlignedMemCPU
::
DeviceAlignedMemCPU
(
std
::
size_t
mem_size
,
std
::
size_t
alignment
)
DeviceAlignedMemCPU
::
DeviceAlignedMemCPU
(
std
::
size_t
mem_size
,
std
::
size_t
alignment
)
:
mMemSize
(
mem_size
),
mAlignment
(
alignment
)
:
mMemSize
(
mem_size
),
mAlignment
(
alignment
)
...
@@ -68,32 +67,32 @@ struct KernelTimerImpl
...
@@ -68,32 +67,32 @@ struct KernelTimerImpl
{
{
KernelTimerImpl
()
KernelTimerImpl
()
{
{
hip
GetErrorString
(
hipEventCreate
(
&
mStart
));
hip
_check_error
(
hipEventCreate
(
&
mStart
));
hip
GetErrorString
(
hipEventCreate
(
&
mEnd
));
hip
_check_error
(
hipEventCreate
(
&
mEnd
));
}
}
~
KernelTimerImpl
()
~
KernelTimerImpl
()
{
{
hip
GetErrorString
(
hipEventDestroy
(
mStart
));
hip
_check_error
(
hipEventDestroy
(
mStart
));
hip
GetErrorString
(
hipEventDestroy
(
mEnd
));
hip
_check_error
(
hipEventDestroy
(
mEnd
));
}
}
void
Start
()
void
Start
()
{
{
hip
GetErrorString
(
hipDeviceSynchronize
());
hip
_check_error
(
hipDeviceSynchronize
());
hip
GetErrorString
(
hipEventRecord
(
mStart
,
nullptr
));
hip
_check_error
(
hipEventRecord
(
mStart
,
nullptr
));
}
}
void
End
()
void
End
()
{
{
hip
GetErrorString
(
hipEventRecord
(
mEnd
,
nullptr
));
hip
_check_error
(
hipEventRecord
(
mEnd
,
nullptr
));
hip
GetErrorString
(
hipEventSynchronize
(
mEnd
));
hip
_check_error
(
hipEventSynchronize
(
mEnd
));
}
}
float
GetElapsedTime
()
const
float
GetElapsedTime
()
const
{
{
float
time
;
float
time
;
hip
GetErrorString
(
hipEventElapsedTime
(
&
time
,
mStart
,
mEnd
));
hip
_check_error
(
hipEventElapsedTime
(
&
time
,
mStart
,
mEnd
));
return
time
;
return
time
;
}
}
...
...
library/src/host_tensor/host_tensor.cpp
View file @
b134b7d6
...
@@ -25,7 +25,7 @@ std::size_t HostTensorDescriptor::GetElementSize() const
...
@@ -25,7 +25,7 @@ std::size_t HostTensorDescriptor::GetElementSize() const
std
::
size_t
HostTensorDescriptor
::
GetElementSpace
()
const
std
::
size_t
HostTensorDescriptor
::
GetElementSpace
()
const
{
{
std
::
size_t
space
=
1
;
std
::
size_t
space
=
1
;
for
(
in
t
i
=
0
;
i
<
mLens
.
size
();
++
i
)
for
(
std
::
size_
t
i
=
0
;
i
<
mLens
.
size
();
++
i
)
{
{
space
+=
(
mLens
[
i
]
-
1
)
*
mStrides
[
i
];
space
+=
(
mLens
[
i
]
-
1
)
*
mStrides
[
i
];
}
}
...
@@ -68,7 +68,7 @@ void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream
...
@@ -68,7 +68,7 @@ void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream
// FIXME: remove
// FIXME: remove
void
bf16_to_f32_
(
const
Tensor
<
ck
::
bhalf_t
>&
src
,
Tensor
<
float
>&
dst
)
void
bf16_to_f32_
(
const
Tensor
<
ck
::
bhalf_t
>&
src
,
Tensor
<
float
>&
dst
)
{
{
for
(
in
t
i
=
0
;
i
<
src
.
mData
.
size
();
++
i
)
for
(
std
::
size_
t
i
=
0
;
i
<
src
.
mData
.
size
();
++
i
)
dst
.
mData
[
i
]
=
ck
::
type_convert
<
float
>
(
src
.
mData
[
i
]);
dst
.
mData
[
i
]
=
ck
::
type_convert
<
float
>
(
src
.
mData
[
i
]);
}
}
#endif
#endif
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
b134b7d6
...
@@ -11,6 +11,7 @@ include_directories(BEFORE
...
@@ -11,6 +11,7 @@ include_directories(BEFORE
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/thread
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/thread
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/element
${
PROJECT_SOURCE_DIR
}
/include/ck/tensor_operation/gpu/element
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host_tensor
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host_tensor
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/host
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance/gpu/reduce
${
PROJECT_SOURCE_DIR
}
/library/include/ck/library/tensor_operation_instance/gpu/reduce
${
PROJECT_SOURCE_DIR
}
/external/include/half
${
PROJECT_SOURCE_DIR
}
/external/include/half
...
@@ -18,7 +19,7 @@ include_directories(BEFORE
...
@@ -18,7 +19,7 @@ include_directories(BEFORE
function
(
add_instance_library INSTANCE_NAME
)
function
(
add_instance_library INSTANCE_NAME
)
message
(
"adding instance
${
INSTANCE_NAME
}
"
)
message
(
"adding instance
${
INSTANCE_NAME
}
"
)
add_library
(
${
INSTANCE_NAME
}
SHARED
${
ARGN
}
)
add_library
(
${
INSTANCE_NAME
}
OBJECT
${
ARGN
}
)
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
set_target_properties
(
${
INSTANCE_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
${
INSTANCE_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE ON
)
endfunction
(
add_instance_library INSTANCE_NAME
)
endfunction
(
add_instance_library INSTANCE_NAME
)
...
@@ -41,3 +42,73 @@ add_subdirectory(convnd_bwd_data)
...
@@ -41,3 +42,73 @@ add_subdirectory(convnd_bwd_data)
add_subdirectory
(
grouped_gemm
)
add_subdirectory
(
grouped_gemm
)
add_subdirectory
(
conv2d_bwd_weight
)
add_subdirectory
(
conv2d_bwd_weight
)
add_subdirectory
(
batched_gemm_reduce
)
add_subdirectory
(
batched_gemm_reduce
)
add_library
(
device_operations STATIC
$<TARGET_OBJECTS:device_conv1d_fwd_instance>
$<TARGET_OBJECTS:device_batched_gemm_instance>
$<TARGET_OBJECTS:device_conv2d_bwd_data_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_bias_relu_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_bias_relu_add_instance>
$<TARGET_OBJECTS:device_conv2d_fwd_bias_relu_atomic_add_instance>
$<TARGET_OBJECTS:device_gemm_instance>
$<TARGET_OBJECTS:device_gemm_bias_relu_instance>
$<TARGET_OBJECTS:device_gemm_bias_relu_add_instance>
$<TARGET_OBJECTS:device_gemm_bias2d_instance>
$<TARGET_OBJECTS:device_reduce_instance>
$<TARGET_OBJECTS:device_convnd_bwd_data_instance>
$<TARGET_OBJECTS:device_grouped_gemm_instance>
$<TARGET_OBJECTS:device_conv2d_bwd_weight_instance>
$<TARGET_OBJECTS:device_batched_gemm_reduce_instance>
$<TARGET_OBJECTS:device_conv3d_fwd_instance>
device_conv2d.cpp
)
add_library
(
composablekernels::device_operations ALIAS device_operations
)
set
(
DEV_OPS_INC_DIRS
${
PROJECT_SOURCE_DIR
}
/include/ck/
${
PROJECT_SOURCE_DIR
}
/library/include/ck/
${
PROJECT_SOURCE_DIR
}
/external/include/
)
target_compile_features
(
device_operations PUBLIC
)
set_target_properties
(
device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
device_operations PUBLIC
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/utility>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_description>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/problem_transform>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/device>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/grid>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/block>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/warp>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/thread>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/tensor_operation/gpu/element>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host_tensor>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/tensor_operation_instance>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/tensor_operation_instance/gpu/reduce>
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/half>
)
#once new arches are enabled make this an option on the main cmake file
# and pass down here to be exported
target_compile_options
(
device_operations
PRIVATE --offload-arch=gfx908
)
# install(TARGETS device_operations LIBRARY DESTINATION lib)
install
(
TARGETS device_operations
EXPORT device_operationsTargets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
RUNTIME DESTINATION
${
CMAKE_INSTALL_BINDIR
}
INCLUDES DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
install
(
DIRECTORY
${
DEV_OPS_INC_DIRS
}
DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck
)
install
(
EXPORT device_operationsTargets
FILE composable_kerneldevice_operationsTargets.cmake
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt
View file @
b134b7d6
...
@@ -18,9 +18,9 @@ set(DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
...
@@ -18,9 +18,9 @@ set(DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp;
device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp;
)
)
add_library
(
device_batched_gemm_instance
SHARED
${
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
}
)
add_library
(
device_batched_gemm_instance
OBJECT
${
DEVICE_BATCHED_GEMM_INSTANCE_SOURCE
}
)
target_compile_features
(
device_batched_gemm_instance PUBLIC
)
#
target_compile_features(device_batched_gemm_instance PUBLIC)
set_target_properties
(
device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
device_batched_gemm_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
install
(
TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib
)
#
install(TARGETS device_batched_gemm_instance LIBRARY DESTINATION lib)
clang_tidy_check
(
device_batched_gemm_instance
)
clang_tidy_check
(
device_batched_gemm_instance
)
library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt
View file @
b134b7d6
...
@@ -5,7 +5,8 @@ set(DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
...
@@ -5,7 +5,8 @@ set(DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp
device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp
)
)
add_instance_library
(
device_batched_gemm_reduce_instance
${
DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
}
)
add_instance_library
(
device_batched_gemm_reduce_instance OBJECT
${
DEVICE_BATCHED_GEMM_REDUCE_INSTANCE_SOURCE
}
)
install
(
TARGETS device_batched_gemm_reduce_instance LIBRARY DESTINATION lib
)
target_compile_features
(
device_batched_gemm_reduce_instance PUBLIC
)
set_target_properties
(
device_batched_gemm_reduce_instance PROPERTIES POSITION_INDEPENDENT_CODE ON
)
clang_tidy_check
(
device_batched_gemm_reduce_instance
)
clang_tidy_check
(
device_batched_gemm_reduce_instance
)
Prev
1
2
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment