Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
d490a439
Commit
d490a439
authored
Apr 13, 2023
by
rocking
Browse files
Revise client example of conv 2d, use NHWGC layout
parent
fb678f40
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
43 deletions
+17
-43
client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp
client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp
+17
-43
No files found.
client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp
View file @
d490a439
...
@@ -17,16 +17,16 @@ using InDataType = ck::half_t;
...
@@ -17,16 +17,16 @@ using InDataType = ck::half_t;
using
WeiDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
G
NHWC
;
using
InLayout
=
ck
::
tensor_layout
::
convolution
::
NHW
G
C
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
G
NHWK
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
NHW
G
K
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
2
;
static
constexpr
ck
::
index_t
NumDimSpatial
=
2
;
static
constexpr
ck
::
index_t
G
=
32
;
static
constexpr
ck
::
index_t
G
=
32
;
static
constexpr
ck
::
index_t
N
=
256
;
static
constexpr
ck
::
index_t
N
=
256
;
static
constexpr
ck
::
index_t
K
=
192
;
static
constexpr
ck
::
index_t
K
=
64
;
static
constexpr
ck
::
index_t
C
=
19
2
;
static
constexpr
ck
::
index_t
C
=
3
2
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
Y
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
X
=
3
;
static
constexpr
ck
::
index_t
Hi
=
28
;
static
constexpr
ck
::
index_t
Hi
=
28
;
...
@@ -52,50 +52,24 @@ struct SimpleDeviceMem
...
@@ -52,50 +52,24 @@ struct SimpleDeviceMem
int
main
()
int
main
()
{
{
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
in_lengths
{
G
,
N
,
Hi
,
Wi
,
C
};
// We have NHWGC/GKYXC/NHWGK in memory space
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
in_strides
{
0
,
0
,
0
,
0
,
1
};
// However, CK's API only accept length and stride with order of GNCHW/GKCYX/GNCHW
// Hence, we need to adjust the order of stride
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
wei_lengths
{
G
,
K
,
Y
,
X
,
C
};
std
::
array
<
ck
::
index_t
,
5
>
in_lengths
{
G
,
N
,
C
,
Hi
,
Wi
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
wei_strides
{
0
,
0
,
0
,
0
,
1
};
std
::
array
<
ck
::
index_t
,
5
>
in_strides
{
C
,
Hi
*
Wi
*
G
*
C
,
1
,
Wi
*
G
*
C
,
G
*
C
};
std
::
array
<
ck
::
index_t
,
5
>
wei_lengths
{
G
,
K
,
C
,
Y
,
X
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
out_lengths
{
G
,
N
,
Ho
,
Wo
,
K
};
std
::
array
<
ck
::
index_t
,
5
>
wei_strides
{
K
*
Y
*
X
*
C
,
Y
*
X
*
C
,
1
,
X
*
C
,
C
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
3
>
out_strides
{
0
,
0
,
0
,
0
,
1
};
std
::
array
<
ck
::
index_t
,
5
>
out_lengths
{
G
,
N
,
K
,
Ho
,
Wo
};
std
::
array
<
ck
::
index_t
,
5
>
out_strides
{
C
,
Ho
*
Wo
*
G
*
C
,
1
,
Wo
*
G
*
C
,
G
*
C
};
std
::
partial_sum
(
rbegin
(
in_lengths
),
std
::
prev
(
rend
(
in_lengths
)),
std
::
next
(
rbegin
(
in_strides
)),
std
::
multiplies
<>
{});
std
::
partial_sum
(
rbegin
(
wei_lengths
),
std
::
prev
(
rend
(
wei_lengths
)),
std
::
next
(
rbegin
(
wei_strides
)),
std
::
multiplies
<>
{});
std
::
partial_sum
(
rbegin
(
out_lengths
),
std
::
prev
(
rend
(
out_lengths
)),
std
::
next
(
rbegin
(
out_strides
)),
std
::
multiplies
<>
{});
// transpose GNHWC/GKYXC/GNHWK to GNCHW/GKCYX/GNCHW
std
::
rotate
(
rbegin
(
in_lengths
),
std
::
next
(
rbegin
(
in_lengths
)),
std
::
next
(
rbegin
(
in_lengths
),
3
));
std
::
rotate
(
rbegin
(
in_strides
),
std
::
next
(
rbegin
(
in_strides
)),
std
::
next
(
rbegin
(
in_strides
),
3
));
std
::
rotate
(
rbegin
(
wei_lengths
),
std
::
next
(
rbegin
(
wei_lengths
)),
std
::
next
(
rbegin
(
wei_lengths
),
3
));
std
::
rotate
(
rbegin
(
wei_strides
),
std
::
next
(
rbegin
(
wei_strides
)),
std
::
next
(
rbegin
(
wei_strides
),
3
));
std
::
rotate
(
rbegin
(
out_lengths
),
std
::
next
(
rbegin
(
out_lengths
)),
std
::
next
(
rbegin
(
out_lengths
),
3
));
std
::
rotate
(
rbegin
(
out_strides
),
std
::
next
(
rbegin
(
out_strides
)),
std
::
next
(
rbegin
(
out_strides
),
3
));
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_strides
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_strides
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_dilations
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_dilations
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_left_pads
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_left_pads
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_right_pads
{
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
input_right_pads
{
1
,
1
};
SimpleDeviceMem
in
(
sizeof
(
InDataType
)
*
G
*
N
*
Hi
*
Wi
*
C
);
SimpleDeviceMem
in
(
sizeof
(
InDataType
)
*
N
*
Hi
*
Wi
*
G
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
wei
(
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
G
*
N
*
Ho
*
Wo
*
K
);
SimpleDeviceMem
out
(
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
);
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
InLayout
,
InLayout
,
...
@@ -155,9 +129,9 @@ int main()
...
@@ -155,9 +129,9 @@ int main()
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
true
});
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
true
});
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
G
*
N
*
K
*
C
*
Ho
*
Wo
*
Y
*
X
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
G
*
N
*
K
*
C
*
Ho
*
Wo
*
Y
*
X
;
std
::
size_t
num_bytes
=
sizeof
(
InDataType
)
*
G
*
N
*
Hi
*
Wi
*
C
+
std
::
size_t
num_bytes
=
sizeof
(
InDataType
)
*
N
*
Hi
*
Wi
*
G
*
C
+
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
+
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
+
sizeof
(
OutDataType
)
*
G
*
N
*
Ho
*
Wo
*
K
;
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
gb_per_sec
=
num_bytes
/
1.E6
/
avg_time
;
float
gb_per_sec
=
num_bytes
/
1.E6
/
avg_time
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment