Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f9560180
"...composable_kernel_rocm.git" did not exist on "fb1ccfa9df534c8c9f351dd959a0ff692d6f9210"
Commit
f9560180
authored
Oct 27, 2021
by
Jing Zhang
Browse files
debugging maxpool
parent
b5bc31bd
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
15 additions
and
12 deletions
+15
-12
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
...l/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
+9
-7
composable_kernel/include/utility/amd_buffer_addressing.hpp
composable_kernel/include/utility/amd_buffer_addressing.hpp
+0
-1
host/driver_offline/include/device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
...ward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
+5
-3
host/driver_offline/src/conv_fwd_driver_offline_nchwc.cpp
host/driver_offline/src/conv_fwd_driver_offline_nchwc.cpp
+1
-1
No files found.
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2_add.hpp
View file @
f9560180
...
...
@@ -633,13 +633,15 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
auto
bias_global_buf
=
make_dynamic_buffer
<
AddressSpaceEnum_t
::
Global
>
(
p_bias_global
,
bias_k0_k1_grid_desc
.
GetElementSpaceSize
());
// if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0)
// printf("a: %d b: %d c: %d d: %d bias: %d\n",
//(int)a_e0_e1_k0_k1_e2_grid_desc.GetElementSpaceSize(),
//(int)b_e0_e1_n_h0_h1_h2_w0_w1_w2_e2_grid_desc.GetElementSpaceSize(),
//(int)c_k0_k1_n_h0_h1_h2_w0_w1_w2_grid_desc.GetElementSpaceSize(),
//(int)d_k0_k1_n_h0_h1_hx_w0_w1_wx_grid_desc.GetElementSpaceSize(),
//(int)bias_k0_k1_grid_desc.GetElementSpaceSize());
#if 0
if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0)
printf("a: %d b: %d c: %d d: %d bias: %d\n",
(int)a_e0_e1_k0_k1_e2_grid_desc.GetElementSpaceSize(),
(int)b_e0_e1_n_h0_h1_h2_w0_w1_w2_e2_grid_desc.GetElementSpaceSize(),
(int)c_k0_k1_n_h0_h1_h2_w0_w1_w2_grid_desc.GetElementSpaceSize(),
(int)d_k0_k1_n_h0_h1_hx_w0_w1_wx_grid_desc.GetElementSpaceSize(),
(int)bias_k0_k1_grid_desc.GetElementSpaceSize());
#endif
constexpr
auto
HasMainE1BlockLoop
=
CalculateHasMainE1BlockLoop
();
constexpr
auto
HasDoubleTailE1BlockLoop
=
CalculateHasDoubleTailE1BlockLoop
();
...
...
composable_kernel/include/utility/amd_buffer_addressing.hpp
View file @
f9560180
...
...
@@ -319,7 +319,6 @@ __device__ typename vector_type<T, N>::type amd_buffer_load_impl(int32x4_t src_w
}
else
if
constexpr
(
N
==
8
)
{
printf
(
"half8_t buffer_load
\n
"
);
// use fp32 load to mimic fp16 load
float4_t
tmp
=
llvm_amdgcn_raw_buffer_load_fp32x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
...
...
host/driver_offline/include/device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp
View file @
f9560180
...
...
@@ -126,10 +126,12 @@ void device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1
using
ABlockTransferThreadClusterLengths_E0_E1_K0_K1_E2
=
Sequence
<
1
,
E1PerBlock
,
1
,
KPerBlock
,
1
>
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_E2
=
E2
;
constexpr
index_t
ABlockTransferDstScalarPerVector_E2
=
E2
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_E2
=
E2
;
constexpr
index_t
ABlockTransferDstScalarPerVector_E2
=
E2
;
constexpr
index_t
BThreadTransferSrcScalarPerVector_E2
=
E2
;
constexpr
index_t
CThreadTransferDstScalarPerVector_K
=
K1
;
constexpr
index_t
CThreadTransferDstScalarPerVector_K
=
K1
;
#endif
const
auto
in_n_c0_hi_wi_c1_desc
=
...
...
host/driver_offline/src/conv_fwd_driver_offline_nchwc.cpp
View file @
f9560180
...
...
@@ -111,7 +111,7 @@ int main(int argc, char* argv[])
constexpr
auto
Wi
=
Number
<
1920
>
{};
constexpr
auto
Y
=
Number
<
3
>
{};
constexpr
auto
X
=
Number
<
3
>
{};
constexpr
auto
C0
=
Number
<
3
>
{};
constexpr
auto
C0
=
Number
<
1
>
{};
constexpr
auto
C1
=
Number
<
4
>
{};
constexpr
auto
K0
=
Number
<
2
>
{};
constexpr
auto
K1
=
Number
<
8
>
{};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment