Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4543d17a
"...resnet50_tensorflow.git" did not exist on "1cbc234530ae1b0ee0ad8e9ffd177b87fe5c4071"
Commit
4543d17a
authored
Feb 19, 2019
by
Chao Liu
Browse files
refactor
parent
b2b622e8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
7 deletions
+6
-7
driver/device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp
driver/device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp
+0
-3
driver/driver.hip.cpp
driver/driver.hip.cpp
+1
-1
src/include/gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp
...idwise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp
+5
-3
No files found.
driver/device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp
View file @
4543d17a
...
...
@@ -227,9 +227,6 @@ void device_implicit_gemm_convolution_1_chwn_csrk_khwn(InDesc,
constexpr
unsigned
HoPerThread
=
1
;
constexpr
unsigned
WoPerThread
=
1
;
constexpr
unsigned
WeiBlockCopyThreadPerDim0
=
4
;
constexpr
unsigned
WeiBlockCopyThreadPerDim1
=
32
;
constexpr
unsigned
InBlockCopy_ThreadPerDimC
=
8
;
constexpr
unsigned
InBlockCopy_ThreadPerDimH
=
2
;
constexpr
unsigned
InBlockCopy_ThreadPerDimW
=
2
;
...
...
driver/driver.hip.cpp
View file @
4543d17a
...
...
@@ -491,7 +491,7 @@ int main(int argc, char* argv[])
constexpr
unsigned
HPad
=
1
;
constexpr
unsigned
WPad
=
1
;
#elif
1
#elif
0
// 1x1 filter, 28x28 image
constexpr
unsigned
N
=
16
;
constexpr
unsigned
C
=
256
;
...
...
src/include/gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp
View file @
4543d17a
...
...
@@ -94,8 +94,8 @@ gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn(const Float* const __restric
// tensor view of blockwise input and weight in LDS
// be careful of alignment
constexpr
auto
in_chwn_block_desc
=
make_ConstantTensorDescriptor
(
Sequence
<
CPerBlock
,
HiPerBlock
,
WiPerBlock
,
NPerBlock
>
{});
constexpr
auto
in_chwn_block_desc
=
make_ConstantTensorDescriptor_aligned
(
Sequence
<
CPerBlock
,
HiPerBlock
,
WiPerBlock
,
NPerBlock
>
{}
,
Number
<
InBlockCopyDataPerRead
>
{}
);
constexpr
auto
wei_ek_block_desc
=
make_ConstantTensorDescriptor_aligned
(
Sequence
<
CPerBlock
*
S
*
R
,
KPerBlock
>
{},
Number
<
WeiBlockCopyDataPerRead
>
{});
...
...
@@ -164,7 +164,9 @@ gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn(const Float* const __restric
HoPerThread
>
{};
// LDS: be careful of alignment
constexpr
unsigned
in_block_size
=
in_chwn_block_desc
.
GetElementSpace
();
constexpr
unsigned
in_block_size
=
in_chwn_block_desc
.
GetElementSpace
(
Number
<
InBlockCopyDataPerRead
>
{});
constexpr
unsigned
wei_block_size
=
wei_csrk_block_desc
.
GetElementSpace
(
Number
<
WeiBlockCopyDataPerRead
>
{});
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment