Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0cb8ba92
Commit
0cb8ba92
authored
Jul 13, 2022
by
Chao Liu
Browse files
update reference conv
parent
ba816e69
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
85 additions
and
48 deletions
+85
-48
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
+11
-11
example/26_contraction/contraction_scale_xdl_fp32.cpp
example/26_contraction/contraction_scale_xdl_fp32.cpp
+3
-3
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+71
-34
No files found.
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
View file @
0cb8ba92
...
...
@@ -26,7 +26,6 @@ using DeviceConvNDFwdInstance = ck::tensor_operation::device::DeviceConvNdFwdNwc
AccDataType
,
//
InElementOp
,
// Input Elementwise Operation
WeiElementOp
,
// Weights Elementwise Operation =
// ck::tensor_layout::convolution::NKHW,
OutElementOp
,
// Output Elementwise Operation
ConvFwdDefault
,
// ConvForwardSpecialization
NumDimSpatial
,
// NumDimSpatial
...
...
@@ -57,16 +56,17 @@ using DeviceConvNDFwdInstance = ck::tensor_operation::device::DeviceConvNdFwdNwc
1
>
;
// CThreadTransferDstScalarPerVector
template
<
ck
::
index_t
NumDimSpatial
>
using
ReferenceConvNDFwdInstance
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
>
;
using
ReferenceConvNDFwdInstance
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NumDimSpatial
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
>
;
int
main
(
int
argc
,
char
*
argv
[])
{
...
...
example/26_contraction/contraction_scale_xdl_fp32.cpp
View file @
0cb8ba92
...
...
@@ -260,16 +260,16 @@ int main(int argc, char* argv[])
e_ms_ns_lengths
=
{
M0
,
M1
,
N0
,
N1
};
e_ms_ns_strides
=
{
std
::
stoi
(
argv
[
22
]),
std
::
stoi
(
argv
[
23
]),
std
::
stoi
(
argv
[
2
4
]),
std
::
stoi
(
argv
[
2
5
])};
std
::
stoi
(
argv
[
18
]),
std
::
stoi
(
argv
[
19
]),
std
::
stoi
(
argv
[
2
0
]),
std
::
stoi
(
argv
[
2
1
])};
scale
=
std
::
stof
(
argv
[
2
6
]);
scale
=
std
::
stof
(
argv
[
2
2
]);
}
else
{
printf
(
"arg1: verification (0=no, 1=yes)
\n
"
);
printf
(
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
);
printf
(
"arg3: time kernel (0=no, 1=yes)
\n
"
);
printf
(
"arg4 to
7
: M0, M1, N0, N1, K0, K1
\n
"
);
printf
(
"arg4 to
9
: M0, M1, N0, N1, K0, K1
\n
"
);
printf
(
"arg10 to 13: Stride_A_M0, Stride_A_M1, Stride_A_K0, Stride_A_K1
\n
"
);
printf
(
"arg14 to 17: Stride_B_N0, Stride_B_N1, Stride_B_K0, Stride_B_K1
\n
"
);
printf
(
"arg18 to 21: Stride_E_M0, Stride_E_M1, Stride_E_N0, Stride_E_N1
\n
"
);
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
0cb8ba92
...
...
@@ -87,65 +87,63 @@ struct ReferenceConvFwd : public device::BaseOperator
{
using
Argument
=
ReferenceConvFwd
::
Argument
;
// FIXME: properly implement "TensorView" for doing transpose or refer to dimension by name
float
Run
(
const
Argument
&
arg
)
{
// tensor descriptor in NCHW/KXYC/NKHW dimensional order
HostTensorDescriptor
in_desc
=
arg
.
input_
.
mDesc
;
HostTensorDescriptor
wei_desc
=
arg
.
weight_
.
mDesc
;
HostTensorDescriptor
o
out_desc
=
arg
.
output_
.
mDesc
;
HostTensorDescriptor
in_desc
=
arg
.
input_
.
mDesc
;
HostTensorDescriptor
wei_desc
=
arg
.
weight_
.
mDesc
;
HostTensorDescriptor
out_desc
=
arg
.
output_
.
mDesc
;
// input
if
constexpr
(
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NWC
>
)
if
constexpr
(
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NWC
>
)
{
in_desc
=
transpose_host_tensor_descriptor_given_new2old
(
input_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
2
,
1
});
arg
.
input_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
2
,
1
});
}
else
if
constexpr
(
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NHWC
>
)
else
if
constexpr
(
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NHWC
>
)
{
in_desc
=
transpose_host_tensor_descriptor_given_new2old
(
input_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
3
,
1
,
2
});
arg
.
input_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
3
,
1
,
2
});
}
else
if
constexpr
(
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>
)
else
if
constexpr
(
is_same_v
<
InLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>
)
{
in_desc
=
transpose_host_tensor_descriptor_given_new2old
(
input_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
4
,
1
,
2
,
3
});
arg
.
input_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
4
,
1
,
2
,
3
});
}
// weight
if
constexpr
(
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KXC
>
)
{
wei_desc
=
transpose_host_tensor_descriptor_given_new2old
(
weight_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
2
,
1
});
arg
.
weight_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
2
,
1
});
}
if
constexpr
(
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KXC
>
)
else
if
constexpr
(
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
K
Y
XC
>
)
{
wei_desc
=
transpose_host_tensor_descriptor_given_new2old
(
weight_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
3
,
1
,
2
});
arg
.
weight_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
3
,
1
,
2
});
}
else
if
constexpr
(
NumDimSpatial
==
2
&&
WeiLayout
==
ck
::
tensor_layout
::
convolution
::
KYXC
)
else
if
constexpr
(
is_same_v
<
WeiLayout
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>
)
{
wei_desc
=
transpose_host_tensor_descriptor_given_new2old
(
weight_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
3
,
1
,
2
});
arg
.
weight_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
4
,
1
,
2
,
3
});
}
// output
if
constexpr
(
NumDimSpatial
==
1
&&
OutLayout
==
ck
::
tensor_layout
::
convolution
::
NWK
)
if
constexpr
(
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NWK
>
)
{
out_desc
=
transpose_host_tensor_descriptor_given_new2old
(
output_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
2
,
1
});
arg
.
output_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
2
,
1
});
}
else
if
constexpr
(
NumDimSpatial
==
2
&&
OutLayout
==
ck
::
tensor_layout
::
convolution
::
NHWK
)
else
if
constexpr
(
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
)
{
out_desc
=
transpose_host_tensor_descriptor_given_new2old
(
output_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
3
,
1
,
2
});
arg
.
output_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
3
,
1
,
2
});
}
else
if
constexpr
(
NumDimSpatial
==
3
&&
OutLayout
==
ck
::
tensor_layout
::
convolution
::
NDHWK
)
else
if
constexpr
(
is_same_v
<
OutLayout
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>
)
{
out_desc
=
transpose_host_tensor_descriptor_given_new2old
(
output_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
4
,
1
,
2
,
3
});
arg
.
output_
.
mDesc
,
std
::
vector
<
std
::
size_t
>
{
0
,
4
,
1
,
2
,
3
});
}
if
constexpr
(
NumDimSpatial
==
1
)
...
...
@@ -161,16 +159,26 @@ struct ReferenceConvFwd : public device::BaseOperator
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
if
(
wi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
wi
)
<
in_desc
.
GetLengths
()[
2
])
{
float
v_in
;
float
v_wei
;
arg
.
in_element_op_
(
v_in
,
ck
::
type_convert
<
float
>
(
arg
.
input_
(
n
,
c
,
wi
)));
arg
.
wei_element_op_
(
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
(
k
,
c
,
x
)));
// FIXME hacky
arg
.
in_element_op_
(
v_in
,
ck
::
type_convert
<
float
>
(
arg
.
input_
.
mData
[
in_desc
.
GetOffsetFromMultiIndex
(
n
,
c
,
wi
)]));
// FIXME hacky
arg
.
wei_element_op_
(
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
.
mData
[
wei_desc
.
GetOffsetFromMultiIndex
(
k
,
c
,
x
)]));
v_acc
+=
v_in
*
v_wei
;
}
...
...
@@ -180,7 +188,10 @@ struct ReferenceConvFwd : public device::BaseOperator
float
v_out
;
arg
.
out_element_op_
(
v_out
,
v_acc
);
arg
.
output_
(
n
,
k
,
wo
)
=
ck
::
type_convert
<
OutDataType
>
(
v_out
);
// FIXME hacky
arg
.
output_
.
mData
[
out_desc
.
GetOffsetFromMultiIndex
({
n
,
k
,
wo
})]
=
ck
::
type_convert
<
OutDataType
>
(
v_out
);
};
make_ParallelTensorFunctor
(
f_ncw
,
...
...
@@ -204,12 +215,14 @@ struct ReferenceConvFwd : public device::BaseOperator
ck
::
type_convert
<
ck
::
long_index_t
>
(
ho
*
arg
.
conv_strides_
[
0
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
y
*
arg
.
conv_dilations_
[
0
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
0
]);
for
(
std
::
size_t
x
=
0
;
x
<
wei_desc
.
GetLengths
()[
3
];
++
x
)
{
auto
wi
=
ck
::
type_convert
<
ck
::
long_index_t
>
(
wo
*
arg
.
conv_strides_
[
1
])
+
ck
::
type_convert
<
ck
::
long_index_t
>
(
x
*
arg
.
conv_dilations_
[
1
])
-
ck
::
type_convert
<
ck
::
long_index_t
>
(
arg
.
in_left_pads_
[
1
]);
if
(
hi
>=
0
&&
ck
::
type_convert
<
std
::
size_t
>
(
hi
)
<
in_desc
.
GetLengths
()[
2
]
&&
wi
>=
0
&&
...
...
@@ -218,10 +231,20 @@ struct ReferenceConvFwd : public device::BaseOperator
float
v_in
;
float
v_wei
;
// FIXME hacky
arg
.
in_element_op_
(
v_in
,
ck
::
type_convert
<
float
>
(
arg
.
input_
(
n
,
c
,
hi
,
wi
)));
v_in
,
ck
::
type_convert
<
float
>
(
arg
.
input_
.
mData
[
in_desc
.
GetOffsetFromMultiIndex
(
n
,
c
,
hi
,
wi
)]));
// FIXME hacky
arg
.
wei_element_op_
(
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
(
k
,
c
,
y
,
x
)));
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
.
mData
[
wei_desc
.
GetOffsetFromMultiIndex
(
k
,
c
,
y
,
x
)]));
v_acc
+=
v_in
*
v_wei
;
}
}
...
...
@@ -231,7 +254,10 @@ struct ReferenceConvFwd : public device::BaseOperator
float
v_out
;
arg
.
out_element_op_
(
v_out
,
v_acc
);
arg
.
output_
(
n
,
k
,
ho
,
wo
)
=
ck
::
type_convert
<
OutDataType
>
(
v_out
);
// FIXME hacky
arg
.
output_
.
mData
[
out_desc
.
GetOffsetFromMultiIndex
({
n
,
k
,
ho
,
wo
})]
=
ck
::
type_convert
<
OutDataType
>
(
v_out
);
};
make_ParallelTensorFunctor
(
f_nchw
,
...
...
@@ -282,12 +308,20 @@ struct ReferenceConvFwd : public device::BaseOperator
float
v_in
;
float
v_wei
;
// FIXME hacky
arg
.
in_element_op_
(
v_in
,
ck
::
type_convert
<
float
>
(
arg
.
input_
(
n
,
c
,
di
,
hi
,
wi
)));
ck
::
type_convert
<
float
>
(
arg
.
input_
.
mData
[
in_desc
.
GetOffsetFromMultiIndex
(
n
,
c
,
di
,
hi
,
wi
)]));
// FIXME hacky
arg
.
wei_element_op_
(
v_wei
,
ck
::
type_convert
<
float
>
(
arg
.
weight_
(
k
,
c
,
z
,
y
,
x
)));
ck
::
type_convert
<
float
>
(
arg
.
weight_
.
mData
[
wei_desc
.
GetOffsetFromMultiIndex
(
k
,
c
,
z
,
y
,
x
)]));
v_acc
+=
v_in
*
v_wei
;
}
}
...
...
@@ -298,7 +332,10 @@ struct ReferenceConvFwd : public device::BaseOperator
float
v_out
;
arg
.
out_element_op_
(
v_out
,
v_acc
);
arg
.
output_
(
n
,
k
,
d_o
,
ho
,
wo
)
=
ck
::
type_convert
<
OutDataType
>
(
v_out
);
// FIXME hacky
arg
.
output_
.
mData
[
out_desc
.
GetOffsetFromMultiIndex
({
n
,
k
,
d_o
,
ho
,
wo
})]
=
ck
::
type_convert
<
OutDataType
>
(
v_out
);
};
make_ParallelTensorFunctor
(
f_nchw
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment