Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b411ee3b
"vscode:/vscode.git/clone" did not exist on "47cb75a3f7ae295ea6fb2f7ab856962aaabedac3"
Commit
b411ee3b
authored
Apr 25, 2022
by
myamlak
Browse files
Cleaning part II
parent
c58d92d3
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
55 additions
and
54 deletions
+55
-54
example/12_reduce/reduce_blockwise.cpp
example/12_reduce/reduce_blockwise.cpp
+1
-1
example/13_pool2d_fwd/pool2d_fwd.cpp
example/13_pool2d_fwd/pool2d_fwd.cpp
+2
-2
example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp
example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp
+3
-3
include/ck/tensor_operation/gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
.../gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp
...u/device/device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
...ation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
+4
-4
include/ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp
...k/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp
+6
-5
library/include/ck/library/host_tensor/host_reduction.hpp
library/include/ck/library/host_tensor/host_reduction.hpp
+2
-2
library/include/ck/library/host_tensor/host_tensor.hpp
library/include/ck/library/host_tensor/host_tensor.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
...e_tensor_operation/cpu/reference_conv_backward_weight.hpp
+5
-5
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
...nsor_operation/cpu/reference_conv_fwd_bias_activation.hpp
+5
-5
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
..._operation/cpu/reference_conv_fwd_bias_activation_add.hpp
+5
-5
library/src/host_tensor/host_tensor.cpp
library/src/host_tensor/host_tensor.cpp
+2
-2
profiler/include/profile_convnd_bwd_data_impl.hpp
profiler/include/profile_convnd_bwd_data_impl.hpp
+5
-5
profiler/include/profile_grouped_gemm_impl.hpp
profiler/include/profile_grouped_gemm_impl.hpp
+5
-5
profiler/src/profile_reduce.cpp
profiler/src/profile_reduce.cpp
+1
-1
test/gemm_split_k/gemm_split_k.cpp
test/gemm_split_k/gemm_split_k.cpp
+1
-1
test/grouped_gemm/grouped_gemm_fp16.cpp
test/grouped_gemm/grouped_gemm_fp16.cpp
+3
-3
test/reduce/reduce_no_index.cpp
test/reduce/reduce_no_index.cpp
+1
-1
test/reduce/reduce_util.hpp
test/reduce/reduce_util.hpp
+1
-1
No files found.
example/12_reduce/reduce_blockwise.cpp
View file @
b411ee3b
...
@@ -140,7 +140,7 @@ class SimpleAppArgs
...
@@ -140,7 +140,7 @@ class SimpleAppArgs
int
processArgs
(
int
argc
,
char
*
argv
[])
int
processArgs
(
int
argc
,
char
*
argv
[])
{
{
unsigned
int
ch
;
int
ch
;
while
(
1
)
while
(
1
)
{
{
...
...
example/13_pool2d_fwd/pool2d_fwd.cpp
View file @
b411ee3b
...
@@ -80,8 +80,8 @@ static void pool_host_verify(const Tensor<InDataType>& in,
...
@@ -80,8 +80,8 @@ static void pool_host_verify(const Tensor<InDataType>& in,
for
(
int
x
=
0
;
x
<
window_spatial_lengths
[
1
];
++
x
)
for
(
int
x
=
0
;
x
<
window_spatial_lengths
[
1
];
++
x
)
{
{
int
wi
=
wo
*
window_strides
[
1
]
+
x
-
in_left_pads
[
1
];
int
wi
=
wo
*
window_strides
[
1
]
+
x
-
in_left_pads
[
1
];
if
(
hi
>=
0
&&
hi
<
in
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
if
(
hi
>=
0
&&
hi
<
ck
::
type_convert
<
int
>
(
in
.
mDesc
.
GetLengths
()[
2
]
)
&&
wi
>=
0
&&
wi
<
in
.
mDesc
.
GetLengths
()[
3
])
wi
<
ck
::
type_convert
<
int
>
(
in
.
mDesc
.
GetLengths
()[
3
])
)
{
{
AccDataType
currVal
=
static_cast
<
AccDataType
>
(
in
(
n
,
c
,
hi
,
wi
));
AccDataType
currVal
=
static_cast
<
AccDataType
>
(
in
(
n
,
c
,
hi
,
wi
));
...
...
example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp
View file @
b411ee3b
...
@@ -131,7 +131,7 @@ int main(int argc, char* argv[])
...
@@ -131,7 +131,7 @@ int main(int argc, char* argv[])
std
::
size_t
flop
=
0
,
num_btype
=
0
;
std
::
size_t
flop
=
0
,
num_btype
=
0
;
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
a_tensors
.
push_back
(
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
a_tensors
.
push_back
(
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
gemm_shapes
[
i
].
M
,
gemm_shapes
[
i
].
K
,
gemm_shapes
[
i
].
StrideA
,
ALayout
{})));
gemm_shapes
[
i
].
M
,
gemm_shapes
[
i
].
K
,
gemm_shapes
[
i
].
StrideA
,
ALayout
{})));
...
@@ -168,7 +168,7 @@ int main(int argc, char* argv[])
...
@@ -168,7 +168,7 @@ int main(int argc, char* argv[])
}
}
}
}
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
a_tensors_device
.
emplace_back
(
a_tensors_device
.
emplace_back
(
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_tensors
[
i
].
mDesc
.
GetElementSpace
()));
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_tensors
[
i
].
mDesc
.
GetElementSpace
()));
...
@@ -213,7 +213,7 @@ int main(int argc, char* argv[])
...
@@ -213,7 +213,7 @@ int main(int argc, char* argv[])
if
(
do_verification
)
if
(
do_verification
)
{
{
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
c_tensors_device
[
i
]
->
FromDevice
(
c_device_tensors
[
i
].
mData
.
data
());
c_tensors_device
[
i
]
->
FromDevice
(
c_device_tensors
[
i
].
mData
.
data
());
auto
ref_gemm
=
ReferenceGemmInstance
{};
auto
ref_gemm
=
ReferenceGemmInstance
{};
...
...
include/ck/tensor_operation/gpu/device/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp
View file @
b411ee3b
...
@@ -698,7 +698,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -698,7 +698,7 @@ struct DeviceConv2dBwdDataXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
}
}
// Gridwise GEMM size
// Gridwise GEMM size
for
(
int
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
arg
.
a_grid_desc_k0_m_k1_container_
.
size
()
)
;
i
++
)
{
{
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_k0_m_k1_container_
[
i
],
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_k0_m_k1_container_
[
i
],
arg
.
b_grid_desc_k0_n_k1_container_
[
i
],
arg
.
b_grid_desc_k0_n_k1_container_
[
i
],
...
...
include/ck/tensor_operation/gpu/device/device_convnd_bwd_data_xdl_ndhwc_kzyxc_ndhwk.hpp
View file @
b411ee3b
...
@@ -1413,7 +1413,7 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
...
@@ -1413,7 +1413,7 @@ struct DeviceConvndBwdDataXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho
}
}
// Gridwise GEMM size
// Gridwise GEMM size
for
(
int
i
=
0
;
i
<
arg
.
a_grid_desc_k0_m_k1_container_
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
arg
.
a_grid_desc_k0_m_k1_container_
.
size
()
)
;
i
++
)
{
{
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_k0_m_k1_container_
[
i
],
if
(
!
GridwiseGemm
::
CheckValidity
(
arg
.
a_grid_desc_k0_m_k1_container_
[
i
],
arg
.
b_grid_desc_k0_n_k1_container_
[
i
],
arg
.
b_grid_desc_k0_n_k1_container_
[
i
],
...
...
include/ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
b411ee3b
...
@@ -862,17 +862,17 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -862,17 +862,17 @@ struct DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
{
// Input tensors can't be bigger than 2GB each.
// Input tensors can't be bigger than 2GB each.
constexpr
std
::
size_t
GB2
=
2
*
1e9
;
constexpr
std
::
size_t
GB2
=
1e9
;
if
(
arg
.
a_grid_desc_k0_m_k1_
.
GetElementSpaceSize
()
>
GB2
)
if
(
ck
::
type_convert
<
std
::
size_t
>
(
arg
.
a_grid_desc_k0_m_k1_
.
GetElementSpaceSize
()
)
>
GB2
)
{
{
return
false
;
return
false
;
}
}
if
(
arg
.
b_grid_desc_k0_n_k1_
.
GetElementSpaceSize
()
>
GB2
)
if
(
ck
::
type_convert
<
std
::
size_t
>
(
arg
.
b_grid_desc_k0_n_k1_
.
GetElementSpaceSize
()
)
>
GB2
)
{
{
return
false
;
return
false
;
}
}
if
(
arg
.
c_grid_desc_m_n_
.
GetElementSpaceSize
()
>
GB2
)
if
(
ck
::
type_convert
<
std
::
size_t
>
(
arg
.
c_grid_desc_m_n_
.
GetElementSpaceSize
()
)
>
GB2
)
{
{
return
false
;
return
false
;
}
}
...
...
include/ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp
View file @
b411ee3b
...
@@ -290,17 +290,18 @@ struct DeviceGroupedGemmXdl
...
@@ -290,17 +290,18 @@ struct DeviceGroupedGemmXdl
{
{
grid_size_
=
0
;
grid_size_
=
0
;
group_count_
=
static_cast
<
in
t
>
(
gemm_shapes
.
size
());
group_count_
=
ck
::
type_convert
<
ck
::
index_
t
>
(
gemm_shapes
.
size
());
if
(
!
(
group_count_
==
p_a
.
size
()
&&
group_count_
==
p_b
.
size
()
&&
if
(
!
(
group_count_
==
ck
::
type_convert
<
ck
::
index_t
>
(
p_a
.
size
())
&&
group_count_
==
p_c
.
size
()))
group_count_
==
ck
::
type_convert
<
ck
::
index_t
>
(
p_b
.
size
())
&&
group_count_
==
ck
::
type_convert
<
ck
::
index_t
>
(
p_c
.
size
())))
{
{
throw
std
::
runtime_error
(
"wrong! group_count_ != P_a/b/c.size"
);
throw
std
::
runtime_error
(
"wrong! group_count_ != P_a/b/c.size"
);
}
}
gemm_desc_kernel_arg_
.
reserve
(
group_count_
);
gemm_desc_kernel_arg_
.
reserve
(
group_count_
);
for
(
index_t
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
index_t
i
=
0
;
i
<
ck
::
type_convert
<
index_t
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
const
index_t
M
=
gemm_shapes
[
i
].
M
;
const
index_t
M
=
gemm_shapes
[
i
].
M
;
const
index_t
N
=
gemm_shapes
[
i
].
N
;
const
index_t
N
=
gemm_shapes
[
i
].
N
;
...
@@ -487,7 +488,7 @@ struct DeviceGroupedGemmXdl
...
@@ -487,7 +488,7 @@ struct DeviceGroupedGemmXdl
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
{
if
(
arg
.
gemm_desc_kernel_arg_
.
size
()
!=
arg
.
group_count_
)
if
(
ck
::
type_convert
<
ck
::
index_t
>
(
arg
.
gemm_desc_kernel_arg_
.
size
()
)
!=
arg
.
group_count_
)
return
false
;
return
false
;
else
else
return
true
;
return
true
;
...
...
library/include/ck/library/host_tensor/host_reduction.hpp
View file @
b411ee3b
...
@@ -211,7 +211,7 @@ struct ReductionHost
...
@@ -211,7 +211,7 @@ struct ReductionHost
AccDataType
accuVal
=
ReduceOpZeroVal
<
AccDataType
,
ReduceOpId
>
();
AccDataType
accuVal
=
ReduceOpZeroVal
<
AccDataType
,
ReduceOpId
>
();
IndexDataType
accuIndex
=
0
;
IndexDataType
accuIndex
=
0
;
for
(
IndexDataType
i
=
0
;
i
<
reduce_dim_indexes
.
size
();
i
++
)
for
(
IndexDataType
i
=
0
;
i
<
ck
::
type_convert
<
IndexDataType
>
(
reduce_dim_indexes
.
size
()
)
;
i
++
)
{
{
auto
offset_reduce
=
auto
offset_reduce
=
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
...
@@ -246,7 +246,7 @@ struct ReductionHost
...
@@ -246,7 +246,7 @@ struct ReductionHost
auto
offset_invariant
=
auto
offset_invariant
=
get_offset_from_index
<
NumInvariantDim
>
(
invariantStrides
,
invariant_index
);
get_offset_from_index
<
NumInvariantDim
>
(
invariantStrides
,
invariant_index
);
for
(
IndexDataType
i
=
0
;
i
<
reduce_dim_indexes
.
size
();
i
++
)
for
(
IndexDataType
i
=
0
;
i
<
ck
::
type_convert
<
IndexDataType
>
(
reduce_dim_indexes
.
size
()
)
;
i
++
)
{
{
auto
offset_reduce
=
auto
offset_reduce
=
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
get_offset_from_index
<
NumReduceDim
>
(
reduceStrides
,
reduce_dim_indexes
[
i
]);
...
...
library/include/ck/library/host_tensor/host_tensor.hpp
View file @
b411ee3b
...
@@ -316,7 +316,7 @@ float check_error(const Tensor<T>& ref, const Tensor<T>& result)
...
@@ -316,7 +316,7 @@ float check_error(const Tensor<T>& ref, const Tensor<T>& result)
constexpr
float
eps
=
1e-10
;
constexpr
float
eps
=
1e-10
;
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
ref
.
mData
.
size
()
)
;
++
i
)
{
{
float
ref_v
=
ck
::
type_convert
<
float
>
(
ref
.
mData
[
i
]);
float
ref_v
=
ck
::
type_convert
<
float
>
(
ref
.
mData
[
i
]);
float
result_v
=
ck
::
type_convert
<
float
>
(
result
.
mData
[
i
]);
float
result_v
=
ck
::
type_convert
<
float
>
(
result
.
mData
[
i
]);
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
View file @
b411ee3b
...
@@ -70,18 +70,18 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
...
@@ -70,18 +70,18 @@ struct ReferenceConvBwdWeight : public device::BaseOperator
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
auto
f_kcyx
=
[
&
](
auto
k
,
auto
c
,
auto
y
,
auto
x
)
{
auto
f_kcyx
=
[
&
](
auto
k
,
auto
c
,
auto
y
,
auto
x
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
int
n
=
0
;
n
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
0
];
++
n
)
for
(
int
n
=
0
;
n
<
ck
::
type_convert
<
int
>
(
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
0
]
)
;
++
n
)
{
{
for
(
int
ho
=
0
;
ho
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
2
];
++
ho
)
for
(
int
ho
=
0
;
ho
<
ck
::
type_convert
<
int
>
(
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
2
]
)
;
++
ho
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
I0
]
+
y
*
arg
.
conv_dilations_
[
I0
]
-
int
hi
=
ho
*
arg
.
conv_strides_
[
I0
]
+
y
*
arg
.
conv_dilations_
[
I0
]
-
arg
.
in_left_pads_
[
I0
];
arg
.
in_left_pads_
[
I0
];
for
(
int
wo
=
0
;
wo
<
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
3
];
++
wo
)
for
(
int
wo
=
0
;
wo
<
ck
::
type_convert
<
int
>
(
arg
.
out_n_k_ho_wo_
.
mDesc
.
GetLengths
()[
3
]
)
;
++
wo
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
I1
]
+
x
*
arg
.
conv_dilations_
[
I1
]
-
int
wi
=
wo
*
arg
.
conv_strides_
[
I1
]
+
x
*
arg
.
conv_dilations_
[
I1
]
-
arg
.
in_left_pads_
[
I1
];
arg
.
in_left_pads_
[
I1
];
if
(
hi
>=
0
&&
hi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
if
(
hi
>=
0
&&
hi
<
ck
::
type_convert
<
int
>
(
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
)
&&
wi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
wi
>=
0
&&
wi
<
ck
::
type_convert
<
int
>
(
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
)
{
{
float
v_out
;
float
v_out
;
float
v_in
;
float
v_in
;
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
View file @
b411ee3b
...
@@ -73,18 +73,18 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
...
@@ -73,18 +73,18 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
int
c
=
0
;
c
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
for
(
int
c
=
0
;
c
<
ck
::
type_convert
<
int
>
(
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
]
)
;
++
c
)
{
{
for
(
int
y
=
0
;
y
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
for
(
int
y
=
0
;
y
<
ck
::
type_convert
<
int
>
(
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
]
)
;
++
y
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
0
]
+
y
*
arg
.
conv_dilations_
[
0
]
-
int
hi
=
ho
*
arg
.
conv_strides_
[
0
]
+
y
*
arg
.
conv_dilations_
[
0
]
-
arg
.
in_left_pads_
[
0
];
arg
.
in_left_pads_
[
0
];
for
(
int
x
=
0
;
x
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
for
(
int
x
=
0
;
x
<
ck
::
type_convert
<
int
>
(
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
]
)
;
++
x
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
1
]
+
x
*
arg
.
conv_dilations_
[
1
]
-
int
wi
=
wo
*
arg
.
conv_strides_
[
1
]
+
x
*
arg
.
conv_dilations_
[
1
]
-
arg
.
in_left_pads_
[
1
];
arg
.
in_left_pads_
[
1
];
if
(
hi
>=
0
&&
hi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
if
(
hi
>=
0
&&
hi
<
ck
::
type_convert
<
int
>
(
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
)
&&
wi
>=
0
&&
wi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
wi
<
ck
::
type_convert
<
int
>
(
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
)
{
{
float
v_in
;
float
v_in
;
float
v_wei
;
float
v_wei
;
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
View file @
b411ee3b
...
@@ -76,18 +76,18 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
...
@@ -76,18 +76,18 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
auto
f_nchw
=
[
&
](
auto
n
,
auto
k
,
auto
ho
,
auto
wo
)
{
float
v_acc
=
0
;
float
v_acc
=
0
;
for
(
int
c
=
0
;
c
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
];
++
c
)
for
(
int
c
=
0
;
c
<
ck
::
type_convert
<
int
>
(
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
1
]
)
;
++
c
)
{
{
for
(
int
y
=
0
;
y
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
];
++
y
)
for
(
int
y
=
0
;
y
<
ck
::
type_convert
<
int
>
(
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
2
]
)
;
++
y
)
{
{
int
hi
=
ho
*
arg
.
conv_strides_
[
0
]
+
y
*
arg
.
conv_dilations_
[
0
]
-
int
hi
=
ho
*
arg
.
conv_strides_
[
0
]
+
y
*
arg
.
conv_dilations_
[
0
]
-
arg
.
in_left_pads_
[
0
];
arg
.
in_left_pads_
[
0
];
for
(
int
x
=
0
;
x
<
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
];
++
x
)
for
(
int
x
=
0
;
x
<
ck
::
type_convert
<
int
>
(
arg
.
wei_k_c_y_x_
.
mDesc
.
GetLengths
()[
3
]
)
;
++
x
)
{
{
int
wi
=
wo
*
arg
.
conv_strides_
[
1
]
+
x
*
arg
.
conv_dilations_
[
1
]
-
int
wi
=
wo
*
arg
.
conv_strides_
[
1
]
+
x
*
arg
.
conv_dilations_
[
1
]
-
arg
.
in_left_pads_
[
1
];
arg
.
in_left_pads_
[
1
];
if
(
hi
>=
0
&&
hi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
if
(
hi
>=
0
&&
hi
<
ck
::
type_convert
<
int
>
(
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
2
]
)
&&
wi
>=
0
&&
wi
<
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
wi
<
ck
::
type_convert
<
int
>
(
arg
.
in_n_c_hi_wi_
.
mDesc
.
GetLengths
()[
3
])
)
{
{
float
v_in
;
float
v_in
;
float
v_wei
;
float
v_wei
;
...
...
library/src/host_tensor/host_tensor.cpp
View file @
b411ee3b
...
@@ -25,7 +25,7 @@ std::size_t HostTensorDescriptor::GetElementSize() const
...
@@ -25,7 +25,7 @@ std::size_t HostTensorDescriptor::GetElementSize() const
std
::
size_t
HostTensorDescriptor
::
GetElementSpace
()
const
std
::
size_t
HostTensorDescriptor
::
GetElementSpace
()
const
{
{
std
::
size_t
space
=
1
;
std
::
size_t
space
=
1
;
for
(
int
i
=
0
;
i
<
mLens
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
mLens
.
size
()
)
;
++
i
)
{
{
space
+=
(
mLens
[
i
]
-
1
)
*
mStrides
[
i
];
space
+=
(
mLens
[
i
]
-
1
)
*
mStrides
[
i
];
}
}
...
@@ -68,7 +68,7 @@ void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream
...
@@ -68,7 +68,7 @@ void ostream_HostTensorDescriptor(const HostTensorDescriptor& desc, std::ostream
// FIXME: remove
// FIXME: remove
void
bf16_to_f32_
(
const
Tensor
<
ck
::
bhalf_t
>&
src
,
Tensor
<
float
>&
dst
)
void
bf16_to_f32_
(
const
Tensor
<
ck
::
bhalf_t
>&
src
,
Tensor
<
float
>&
dst
)
{
{
for
(
int
i
=
0
;
i
<
src
.
mData
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
src
.
mData
.
size
()
)
;
++
i
)
dst
.
mData
[
i
]
=
ck
::
type_convert
<
float
>
(
src
.
mData
[
i
]);
dst
.
mData
[
i
]
=
ck
::
type_convert
<
float
>
(
src
.
mData
[
i
]);
}
}
#endif
#endif
profiler/include/profile_convnd_bwd_data_impl.hpp
View file @
b411ee3b
...
@@ -222,7 +222,7 @@ static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
...
@@ -222,7 +222,7 @@ static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
{
{
float
max_diff
=
1e-6
;
float
max_diff
=
1e-6
;
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
ref
.
mData
.
size
()
)
;
++
i
)
{
{
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
if
(
max_diff
<
diff
)
if
(
max_diff
<
diff
)
...
@@ -236,16 +236,16 @@ template <typename DataType>
...
@@ -236,16 +236,16 @@ template <typename DataType>
void
show_data_nhwc_layout
(
Tensor
<
DataType
>&
nhwc
)
void
show_data_nhwc_layout
(
Tensor
<
DataType
>&
nhwc
)
{
{
std
::
cout
<<
"["
;
std
::
cout
<<
"["
;
for
(
int
n
=
0
;
n
<
nhwc
.
mDesc
.
GetLengths
()[
0
];
n
++
)
for
(
int
n
=
0
;
n
<
ck
::
type_convert
<
int
>
(
nhwc
.
mDesc
.
GetLengths
()[
0
]
)
;
n
++
)
{
{
std
::
cout
<<
"["
;
std
::
cout
<<
"["
;
for
(
int
hi
=
0
;
hi
<
nhwc
.
mDesc
.
GetLengths
()[
2
];
hi
++
)
for
(
int
hi
=
0
;
hi
<
ck
::
type_convert
<
int
>
(
nhwc
.
mDesc
.
GetLengths
()[
2
]
)
;
hi
++
)
{
{
std
::
cout
<<
"["
;
std
::
cout
<<
"["
;
for
(
int
wi
=
0
;
wi
<
nhwc
.
mDesc
.
GetLengths
()[
3
];
wi
++
)
for
(
int
wi
=
0
;
wi
<
ck
::
type_convert
<
int
>
(
nhwc
.
mDesc
.
GetLengths
()[
3
]
)
;
wi
++
)
{
{
std
::
cout
<<
"["
;
std
::
cout
<<
"["
;
for
(
int
c
=
0
;
c
<
nhwc
.
mDesc
.
GetLengths
()[
1
];
c
++
)
for
(
int
c
=
0
;
c
<
ck
::
type_convert
<
int
>
(
nhwc
.
mDesc
.
GetLengths
()[
1
]
)
;
c
++
)
{
{
std
::
cout
<<
static_cast
<
float
>
(
nhwc
(
n
,
c
,
hi
,
wi
))
<<
" "
;
std
::
cout
<<
static_cast
<
float
>
(
nhwc
(
n
,
c
,
hi
,
wi
))
<<
" "
;
}
}
...
...
profiler/include/profile_grouped_gemm_impl.hpp
View file @
b411ee3b
...
@@ -71,7 +71,7 @@ void profile_grouped_gemm_impl(int do_verification,
...
@@ -71,7 +71,7 @@ void profile_grouped_gemm_impl(int do_verification,
}
}
};
};
in
t
group_count
=
Ms
.
size
();
std
::
size_
t
group_count
=
Ms
.
size
();
if
(
!
(
group_count
==
Ns
.
size
()
&&
group_count
==
Ks
.
size
()
&&
group_count
==
StrideAs
.
size
()
&&
if
(
!
(
group_count
==
Ns
.
size
()
&&
group_count
==
Ks
.
size
()
&&
group_count
==
StrideAs
.
size
()
&&
group_count
==
StrideBs
.
size
()
&&
group_count
==
StrideCs
.
size
()))
group_count
==
StrideBs
.
size
()
&&
group_count
==
StrideCs
.
size
()))
...
@@ -83,7 +83,7 @@ void profile_grouped_gemm_impl(int do_verification,
...
@@ -83,7 +83,7 @@ void profile_grouped_gemm_impl(int do_verification,
std
::
vector
<
Tensor
<
BDataType
>>
b_k_n
;
std
::
vector
<
Tensor
<
BDataType
>>
b_k_n
;
std
::
vector
<
Tensor
<
CDataType
>>
c_m_n_device_results
;
std
::
vector
<
Tensor
<
CDataType
>>
c_m_n_device_results
;
for
(
int
i
=
0
;
i
<
Ms
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
Ms
.
size
()
)
;
i
++
)
{
{
a_m_k
.
push_back
(
a_m_k
.
push_back
(
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
Ms
[
i
],
Ks
[
i
],
StrideAs
[
i
],
ALayout
{})));
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
Ms
[
i
],
Ks
[
i
],
StrideAs
[
i
],
ALayout
{})));
...
@@ -144,7 +144,7 @@ void profile_grouped_gemm_impl(int do_verification,
...
@@ -144,7 +144,7 @@ void profile_grouped_gemm_impl(int do_verification,
gemm_shapes
.
reserve
(
group_count
);
gemm_shapes
.
reserve
(
group_count
);
for
(
int
i
=
0
;
i
<
group_count
;
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
group_count
)
;
i
++
)
{
{
a_device_buf
.
emplace_back
(
a_device_buf
.
emplace_back
(
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_m_k
[
i
].
mDesc
.
GetElementSpace
()));
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_m_k
[
i
].
mDesc
.
GetElementSpace
()));
...
@@ -234,7 +234,7 @@ void profile_grouped_gemm_impl(int do_verification,
...
@@ -234,7 +234,7 @@ void profile_grouped_gemm_impl(int do_verification,
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
nrepeat
);
std
::
size_t
flop
=
0
,
num_btype
=
0
;
std
::
size_t
flop
=
0
,
num_btype
=
0
;
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
flop
+=
std
::
size_t
(
2
)
*
Ms
[
i
]
*
Ns
[
i
]
*
Ks
[
i
];
flop
+=
std
::
size_t
(
2
)
*
Ms
[
i
]
*
Ns
[
i
]
*
Ks
[
i
];
...
@@ -258,7 +258,7 @@ void profile_grouped_gemm_impl(int do_verification,
...
@@ -258,7 +258,7 @@ void profile_grouped_gemm_impl(int do_verification,
if
(
do_verification
)
if
(
do_verification
)
{
{
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
c_device_buf
[
i
]
->
FromDevice
(
c_m_n_device_results
[
i
].
mData
.
data
());
c_device_buf
[
i
]
->
FromDevice
(
c_m_n_device_results
[
i
].
mData
.
data
());
...
...
profiler/src/profile_reduce.cpp
View file @
b411ee3b
...
@@ -186,7 +186,7 @@ class AppArgs
...
@@ -186,7 +186,7 @@ class AppArgs
int
processArgs
(
int
argc
,
char
*
argv
[])
int
processArgs
(
int
argc
,
char
*
argv
[])
{
{
unsigned
int
ch
;
int
ch
;
optind
++
;
// to skip the "reduce" module name
optind
++
;
// to skip the "reduce" module name
...
...
test/gemm_split_k/gemm_split_k.cpp
View file @
b411ee3b
...
@@ -45,7 +45,7 @@ static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
...
@@ -45,7 +45,7 @@ static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
{
{
float
max_diff
=
1e-6
;
float
max_diff
=
1e-6
;
for
(
int
i
=
0
;
i
<
ref
.
mData
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
ref
.
mData
.
size
()
)
;
++
i
)
{
{
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
float
diff
=
std
::
abs
(
double
(
ref
.
mData
[
i
])
-
double
(
result
.
mData
[
i
]));
if
(
max_diff
<
diff
)
if
(
max_diff
<
diff
)
...
...
test/grouped_gemm/grouped_gemm_fp16.cpp
View file @
b411ee3b
...
@@ -104,7 +104,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
...
@@ -104,7 +104,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
b_tensors_device
.
reserve
(
group_count
);
b_tensors_device
.
reserve
(
group_count
);
c_tensors_device
.
reserve
(
group_count
);
c_tensors_device
.
reserve
(
group_count
);
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
a_tensors
.
emplace_back
(
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
a_tensors
.
emplace_back
(
Tensor
<
ADataType
>
(
f_host_tensor_descriptor
(
gemm_shapes
[
i
].
M
,
gemm_shapes
[
i
].
K
,
gemm_shapes
[
i
].
StrideA
,
ALayout
{})));
gemm_shapes
[
i
].
M
,
gemm_shapes
[
i
].
K
,
gemm_shapes
[
i
].
StrideA
,
ALayout
{})));
...
@@ -119,7 +119,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
...
@@ -119,7 +119,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
}
}
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
a_tensors_device
.
emplace_back
(
a_tensors_device
.
emplace_back
(
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_tensors
[
i
].
mDesc
.
GetElementSize
()));
std
::
make_unique
<
DeviceMem
>
(
sizeof
(
ADataType
)
*
a_tensors
[
i
].
mDesc
.
GetElementSize
()));
...
@@ -147,7 +147,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
...
@@ -147,7 +147,7 @@ bool TestGroupedGemm(DeviceGroupedGemmPtr_& groupedGemmPtr)
invoker_ptr
->
Run
(
argument_ptr
.
get
());
invoker_ptr
->
Run
(
argument_ptr
.
get
());
for
(
int
i
=
0
;
i
<
gemm_shapes
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
gemm_shapes
.
size
()
)
;
i
++
)
{
{
c_tensors_device
[
i
]
->
FromDevice
(
c_device_tensors
[
i
].
mData
.
data
());
c_tensors_device
[
i
]
->
FromDevice
(
c_device_tensors
[
i
].
mData
.
data
());
...
...
test/reduce/reduce_no_index.cpp
View file @
b411ee3b
...
@@ -460,7 +460,7 @@ class SimpleAppArgs
...
@@ -460,7 +460,7 @@ class SimpleAppArgs
int
processArgs
(
int
argc
,
char
*
argv
[])
int
processArgs
(
int
argc
,
char
*
argv
[])
{
{
unsigned
int
ch
;
int
ch
;
while
(
1
)
while
(
1
)
{
{
...
...
test/reduce/reduce_util.hpp
View file @
b411ee3b
...
@@ -9,7 +9,7 @@ namespace reduce_util {
...
@@ -9,7 +9,7 @@ namespace reduce_util {
template
<
typename
T
>
template
<
typename
T
>
void
to_f32_vector
(
const
Tensor
<
T
>&
src
,
Tensor
<
float
>&
dst
)
void
to_f32_vector
(
const
Tensor
<
T
>&
src
,
Tensor
<
float
>&
dst
)
{
{
for
(
int
i
=
0
;
i
<
src
.
mData
.
size
();
++
i
)
for
(
int
i
=
0
;
i
<
ck
::
type_convert
<
int
>
(
src
.
mData
.
size
()
)
;
++
i
)
dst
.
mData
[
i
]
=
type_convert
<
float
>
(
src
.
mData
[
i
]);
dst
.
mData
[
i
]
=
type_convert
<
float
>
(
src
.
mData
[
i
]);
}
}
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment