Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
de6f254d
"aten/git@developer.sourcefind.cn:OpenDAS/torch-cluster.git" did not exist on "cb0e5f634ae21513e10ff1252007d51df1a1adde"
Commit
de6f254d
authored
Jun 07, 2019
by
Chao Liu
Browse files
debugging
parent
109f1e90
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
3 deletions
+11
-3
src/include/gridwise_convolution_implicit_gemm_v4_lds_double_buffer_nchw_kcyx_nkhw.hip.hpp
...implicit_gemm_v4_lds_double_buffer_nchw_kcyx_nkhw.hip.hpp
+11
-3
No files found.
src/include/gridwise_convolution_implicit_gemm_v4_lds_double_buffer_nchw_kcyx_nkhw.hip.hpp
View file @
de6f254d
...
...
@@ -269,7 +269,7 @@ struct GridwiseConvolutionImplicitGemm_v4_lds_double_buffer_nchw_kcyx_nkhw
// choose GEMM implementation here
const
auto
run_blockwise_gemm
=
[
&
](
auto
...
Xs
)
{
#if
0
#if
1
return
blockwise_gemm
.
Run
(
Xs
...);
#else
return
blockwise_gemm
.
Run_asm
(
Xs
...);
...
...
@@ -441,8 +441,6 @@ struct GridwiseConvolutionImplicitGemm_v4_lds_double_buffer_nchw_kcyx_nkhw
out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc
.
GetStride
(
I7
)
*
Strides
{}.
Get
(
I1
)
>
{};
constexpr
auto
out_n0_n1_n2_k0_k1_k2_h_w_new_global_mem_desc
=
make_ConstantTensorDescriptor
(
out_lengths_new
,
out_strides_new
);
...
...
@@ -473,6 +471,15 @@ struct GridwiseConvolutionImplicitGemm_v4_lds_double_buffer_nchw_kcyx_nkhw
out_k_n1_b_n2_global_merged_desc
.
GetOffsetFromMultiIndex
(
k_thread_data_on_global
,
0
,
b_thread_data_on_global
,
0
);
#if 1
if
(
get_block_1d_id
()
==
0
&&
get_thread_local_1d_id
()
==
0
)
{
print_ConstantTensorDescriptor
(
"out_n0_n1_n2_k0_k1_k2_h_w_new_global_mem_desc"
,
out_n0_n1_n2_k0_k1_k2_h_w_new_global_mem_desc
);
print_ConstantTensorDescriptor
(
"out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc"
,
out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc
);
}
#endif
#if 0
threadwise_generic_tensor_slice_copy_v1(
out_n0_n1_n2_k0_k1_k2_h_w_thread_desc,
p_out_thread,
...
...
@@ -483,6 +490,7 @@ struct GridwiseConvolutionImplicitGemm_v4_lds_double_buffer_nchw_kcyx_nkhw
out_n0_n1_n2_k0_k1_k2_h_w_thread_desc.GetLengths(),
arithmetic_sequence_gen<0, 8, 1>::SeqType{},
Number<1>{});
#endif
}
}
};
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment