Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5e627be5
Commit
5e627be5
authored
Jul 22, 2021
by
Jing Zhang
Browse files
debug
parent
78740e43
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
29 additions
and
72 deletions
+29
-72
composable_kernel/include/driver/driver_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
...tion_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
+0
-2
composable_kernel/include/tensor_operation/gridwise_static_gemm_v2.hpp
...rnel/include/tensor_operation/gridwise_static_gemm_v2.hpp
+5
-55
composable_kernel/include/utility/amd_buffer_addressing_v2.hpp
...sable_kernel/include/utility/amd_buffer_addressing_v2.hpp
+6
-0
host/driver_offline/conv_fwd_driver_offline.cpp
host/driver_offline/conv_fwd_driver_offline.cpp
+18
-15
No files found.
composable_kernel/include/driver/driver_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
View file @
5e627be5
...
...
@@ -269,7 +269,6 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
index_t
nrepeat
=
100
;
#if 1
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
std
::
cout
<<
"Start running "
<<
nrepeat
<<
" times..."
<<
std
::
endl
;
...
...
@@ -376,7 +375,6 @@ struct DriverStaticConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
std
::
cout
<<
"Average time : "
<<
ave_time
<<
" ms, "
<<
perf
<<
" TFlop/s"
<<
std
::
endl
;
}
#endif
}
};
}
// namespace ck
...
...
composable_kernel/include/tensor_operation/gridwise_static_gemm_v2.hpp
View file @
5e627be5
...
...
@@ -51,7 +51,11 @@ struct GridwiseStaticGemm_km_kn_mn_v3
{
__host__
__device__
static
constexpr
index_t
GetSharedMemoryNumberOfByte
()
{
constexpr
auto
E
=
EPerBlock
*
3
*
3
;
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
a_e_k_global_desc
=
AGlobalDesc
{};
constexpr
auto
E
=
a_e_k_global_desc
.
GetLength
(
I0
);
constexpr
auto
max_lds_align
=
math
::
lcm
(
Number
<
ABlockTransferDstScalarPerVector_K
>
{},
Number
<
KPerBlock
>
{});
...
...
@@ -393,60 +397,6 @@ struct GridwiseStaticGemm_km_kn_mn_v3
integral_constant
<
bool
,
HasMainKBlockLoop
>
{},
integral_constant
<
bool
,
HasDoubleTailKBlockLoop
>
{});
}
#if 0
// pass tensor descriptors by their pointers
template <bool HasMainKBlockLoop, bool HasDoubleTailKBlockLoop>
__device__ void Run(const AGlobalDesc* p_a_e_k_global_desc,
const FloatAB* __restrict__ p_a_global,
const BGlobalDesc* p_b_e_n_ho_wo_global_desc,
const FloatAB* __restrict__ p_b_global,
const CGlobalDesc* p_c_k_n_ho_wo_global_desc,
FloatC* __restrict__ p_c_global,
integral_constant<bool, HasMainKBlockLoop>,
integral_constant<bool, HasDoubleTailKBlockLoop>) const
{
const auto a_e_k_global_desc = *p_a_e_k_global_desc;
const auto b_e_n_ho_wo_global_desc = *p_b_e_n_ho_wo_global_desc;
const auto c_k_n_ho_wo_global_desc = *p_c_k_n_ho_wo_global_desc;
Run(a_e_k_global_desc,
p_a_global,
b_e_n_ho_wo_global_desc,
p_b_global,
c_k_n_ho_wo_global_desc,
p_c_global,
integral_constant<bool, HasMainKBlockLoop>{},
integral_constant<bool, HasDoubleTailKBlockLoop>{});
}
// pass tensor descriptors by void*
template <bool HasMainKBlockLoop, bool HasDoubleTailKBlockLoop>
__device__ void Run(const void* p_a_e_k_global_desc,
const FloatAB* __restrict__ p_a_global,
const void* p_b_e_n_ho_wo_global_desc,
const FloatAB* __restrict__ p_b_global,
const void* p_c_k_n_ho_wo_global_desc,
FloatC* __restrict__ p_c_global,
integral_constant<bool, HasMainKBlockLoop>,
integral_constant<bool, HasDoubleTailKBlockLoop>) const
{
const auto a_e_k_global_desc = *reinterpret_cast<const AGlobalDesc*>(p_a_e_k_global_desc);
const auto b_e_n_ho_wo_global_desc =
*reinterpret_cast<const BGlobalDesc*>(p_b_e_n_ho_wo_global_desc);
const auto c_k_n_ho_wo_global_desc =
*reinterpret_cast<const CGlobalDesc*>(p_c_k_n_ho_wo_global_desc);
Run(a_e_k_global_desc,
p_a_global,
b_e_n_ho_wo_global_desc,
p_b_global,
c_k_n_ho_wo_global_desc,
p_c_global,
integral_constant<bool, HasMainKBlockLoop>{},
integral_constant<bool, HasDoubleTailKBlockLoop>{});
}
#endif
};
}
// namespace ck
...
...
composable_kernel/include/utility/amd_buffer_addressing_v2.hpp
View file @
5e627be5
...
...
@@ -567,6 +567,7 @@ __device__ void amd_buffer_store_impl_v2(const typename vector_type<T, N>::type
{
vector_type
<
half_t
,
8
>
tmp
{
src_thread_data
};
#if 0
__llvm_amdgcn_raw_buffer_store_fp16x4(tmp.AsType<half4_t>()[Number<0>{}],
dst_wave_buffer_resource,
dst_thread_addr_offset,
...
...
@@ -578,6 +579,11 @@ __device__ void amd_buffer_store_impl_v2(const typename vector_type<T, N>::type
dst_thread_addr_offset,
dst_wave_addr_offset + 4 * sizeof(half_t),
0);
#else
auto
tmp_
=
as_type
<
float4_t
>
(
tmp
);
__llvm_amdgcn_raw_buffer_store_fp32x4
(
tmp_
,
dst_wave_buffer_resource
,
dst_thread_addr_offset
,
dst_wave_addr_offset
,
0
);
#endif
}
}
}
...
...
host/driver_offline/conv_fwd_driver_offline.cpp
View file @
5e627be5
...
...
@@ -16,6 +16,7 @@
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r2_nhwc_kyxc_nhwk.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v6r1_nchw_kcyx_nkhw.hpp"
#include "device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
...
...
@@ -104,7 +105,7 @@ int main(int argc, char* argv[])
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
Hi
=
54
0
;
constexpr
index_t
Hi
=
54
4
;
constexpr
index_t
Wi
=
960
;
constexpr
index_t
K
=
16
;
constexpr
index_t
Y
=
3
;
...
...
@@ -380,10 +381,12 @@ int main(int argc, char* argv[])
const
auto
tmp
=
f_make_for_device_nchw
();
device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw
<
in_data_t
,
8
,
acc_data_t
,
out_data_t
>
(
tmp
[
I0
],
#if 1
device_static_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw
#else
device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw
#endif
<
in_data_t
,
8
,
acc_data_t
,
out_data_t
>
(
tmp
[
I0
],
tmp
[
I1
],
tmp
[
I2
],
tmp
[
I3
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment