Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4f31669f
Commit
4f31669f
authored
Mar 05, 2021
by
Chao Liu
Browse files
add workaround for SWDEV-275126
parent
129cc2e3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
111 additions
and
1 deletion
+111
-1
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
...e/tensor_description/dynamic_tensor_descriptor_helper.hpp
+26
-0
composable_kernel/include/utility/config.amd.hpp.in
composable_kernel/include/utility/config.amd.hpp.in
+5
-0
composable_kernel/include/utility/container_helper.hpp
composable_kernel/include/utility/container_helper.hpp
+79
-0
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+1
-1
No files found.
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
View file @
4f31669f
...
@@ -15,6 +15,26 @@ namespace ck {
...
@@ -15,6 +15,26 @@ namespace ck {
* functions on GPU without worrying about scratch memory usage.
* functions on GPU without worrying about scratch memory usage.
*/
*/
#if CK_WORKAROUND_SWDEV_275126
template
<
typename
Lengths
,
typename
Strides
,
index_t
I
,
typename
AccOld
>
__host__
__device__
constexpr
auto
calculate_element_space_size_impl
(
const
Lengths
&
lengths
,
const
Strides
&
strides
,
Number
<
I
>
i
,
AccOld
acc_old
)
{
auto
acc_new
=
acc_old
+
(
lengths
[
i
]
-
Number
<
1
>
{})
*
strides
[
i
];
if
constexpr
(
i
.
value
<
Lengths
::
Size
()
-
1
)
{
return
calculate_element_space_size_impl
(
lengths
,
strides
,
i
+
Number
<
1
>
{},
acc_new
);
}
else
{
return
acc_new
;
}
}
#endif
template
<
typename
...
Lengths
,
template
<
typename
...
Lengths
,
typename
...
Strides
,
typename
...
Strides
,
typename
std
::
enable_if
<
sizeof
...(
Lengths
)
==
sizeof
...(
Strides
),
bool
>
::
type
=
false
>
typename
std
::
enable_if
<
sizeof
...(
Lengths
)
==
sizeof
...(
Strides
),
bool
>
::
type
=
false
>
...
@@ -33,6 +53,8 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths,
...
@@ -33,6 +53,8 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths,
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
#if !CK_WORKAROUND_SWDEV_275126
// rocm-4.1 compiler would crash for recursive labmda
// recursive function for reduction
// recursive function for reduction
auto
f
=
[
&
](
auto
fs
,
auto
i
,
auto
acc_old
)
{
auto
f
=
[
&
](
auto
fs
,
auto
i
,
auto
acc_old
)
{
auto
acc_new
=
acc_old
+
(
lengths
[
i
]
-
Number
<
1
>
{})
*
strides
[
i
];
auto
acc_new
=
acc_old
+
(
lengths
[
i
]
-
Number
<
1
>
{})
*
strides
[
i
];
...
@@ -48,6 +70,10 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths,
...
@@ -48,6 +70,10 @@ make_dynamic_naive_tensor_descriptor_v2(const Tuple<Lengths...>& lengths,
};
};
const
auto
element_space_size
=
f
(
f
,
Number
<
0
>
{},
Number
<
1
>
{});
const
auto
element_space_size
=
f
(
f
,
Number
<
0
>
{},
Number
<
1
>
{});
#else
const
auto
element_space_size
=
calculate_element_space_size_impl
(
lengths
,
strides
,
Number
<
0
>
{},
Number
<
1
>
{});
#endif
return
DynamicTensorDescriptor
<
remove_cv_t
<
decltype
(
transforms
)
>
,
return
DynamicTensorDescriptor
<
remove_cv_t
<
decltype
(
transforms
)
>
,
remove_cv_t
<
decltype
(
low_dim_hidden_idss
)
>
,
remove_cv_t
<
decltype
(
low_dim_hidden_idss
)
>
,
...
...
composable_kernel/include/utility/config.amd.hpp.in
View file @
4f31669f
...
@@ -116,6 +116,11 @@
...
@@ -116,6 +116,11 @@
#define CK_WORKAROUND_SWDEV_241664 1
#define CK_WORKAROUND_SWDEV_241664 1
#endif
#endif
// workaround for compiler crash when compiling recursive lambda
#ifndef CK_WORKAROUND_SWDEV_275126
#define CK_WORKAROUND_SWDEV_275126 1
#endif
namespace ck {
namespace ck {
enum AddressSpace
enum AddressSpace
...
...
composable_kernel/include/utility/container_helper.hpp
View file @
4f31669f
...
@@ -97,6 +97,8 @@ __host__ __device__ constexpr auto container_reorder_given_old2new(Sequence<Is..
...
@@ -97,6 +97,8 @@ __host__ __device__ constexpr auto container_reorder_given_old2new(Sequence<Is..
return
container_reorder_give_new2old
(
old_seq
,
new2old
);
return
container_reorder_give_new2old
(
old_seq
,
new2old
);
}
}
#if !CK_WORKAROUND_SWDEV_275126
// rocm-4.1 compiler would crash for recursive lambda
template
<
typename
Container
,
template
<
typename
Container
,
typename
Reduce
,
typename
Reduce
,
typename
Init
,
typename
Init
,
...
@@ -131,6 +133,50 @@ __host__ __device__ constexpr auto container_reduce(const Container& x,
...
@@ -131,6 +133,50 @@ __host__ __device__ constexpr auto container_reduce(const Container& x,
// start recursion
// start recursion
return
f
(
f
,
Number
<
IBegin
>
{},
init
);
return
f
(
f
,
Number
<
IBegin
>
{},
init
);
}
}
#else
// i is index, y_old is current scan, r_old is current reduction
template
<
typename
Container
,
typename
Reduce
,
typename
ROld
,
index_t
I
,
index_t
IEnd
,
index_t
IStep
>
__host__
__device__
constexpr
auto
container_reduce_impl
(
const
Container
&
x
,
Reduce
reduce
,
ROld
r_old
,
Number
<
I
>
i
,
Number
<
IEnd
>
,
Number
<
IStep
>
)
{
auto
r_new
=
reduce
(
x
[
i
],
r_old
);
if
constexpr
(
i
.
value
<
IEnd
-
IStep
)
{
return
container_reduce_impl
(
x
,
reduce
,
r_new
,
i
+
Number
<
IStep
>
{},
Number
<
IEnd
>
{},
Number
<
IStep
>
{});
}
else
{
return
r_new
;
}
}
// rocm-4.1 compiler would crash for recursive lambda
template
<
typename
Container
,
typename
Reduce
,
typename
Init
,
index_t
IBegin
=
0
,
index_t
IEnd
=
Container
::
Size
(),
index_t
IStep
=
1
>
__host__
__device__
constexpr
auto
container_reduce
(
const
Container
&
x
,
Reduce
reduce
,
Init
init
,
Number
<
IBegin
>
=
Number
<
0
>
{},
Number
<
IEnd
>
=
Number
<
Container
::
Size
()
>
{},
Number
<
IStep
>
=
Number
<
1
>
{})
{
static_assert
((
IEnd
-
IBegin
)
%
IStep
==
0
,
"wrong!"
);
return
container_reduce_impl
(
x
,
reduce
,
init
,
Number
<
IBegin
>
{},
Number
<
IEnd
>
{},
Number
<
IStep
>
{});
}
#endif
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
...
@@ -169,6 +215,8 @@ container_reverse_exclusive_scan(const Array<TData, NSize>& x, Reduce f, TData i
...
@@ -169,6 +215,8 @@ container_reverse_exclusive_scan(const Array<TData, NSize>& x, Reduce f, TData i
return
y
;
return
y
;
}
}
#if !CK_WORKAROUND_SWDEV_275126
// rocm4.1 compiler would crash with recursive lambda
template
<
typename
...
Xs
,
typename
Reduce
,
typename
Init
>
template
<
typename
...
Xs
,
typename
Reduce
,
typename
Init
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
container_reverse_exclusive_scan
(
const
Tuple
<
Xs
...
>&
x
,
Reduce
reduce
,
Init
init
)
container_reverse_exclusive_scan
(
const
Tuple
<
Xs
...
>&
x
,
Reduce
reduce
,
Init
init
)
...
@@ -196,6 +244,37 @@ container_reverse_exclusive_scan(const Tuple<Xs...>& x, Reduce reduce, Init init
...
@@ -196,6 +244,37 @@ container_reverse_exclusive_scan(const Tuple<Xs...>& x, Reduce reduce, Init init
// start recursion
// start recursion
return
f
(
f
,
Number
<
NSize
-
1
>
{},
make_tuple
(
init
),
init
);
return
f
(
f
,
Number
<
NSize
-
1
>
{},
make_tuple
(
init
),
init
);
}
}
#else
// i is index, y_old is current scan, r_old is current reduction
template
<
typename
...
Xs
,
typename
Reduce
,
index_t
I
,
typename
YOld
,
typename
ROld
>
__host__
__device__
constexpr
auto
container_reverse_exclusive_scan_impl
(
const
Tuple
<
Xs
...
>&
x
,
Reduce
reduce
,
Number
<
I
>
i
,
YOld
y_old
,
ROld
r_old
)
{
auto
r_new
=
reduce
(
x
[
i
],
r_old
);
auto
y_new
=
container_push_front
(
y_old
,
r_new
);
if
constexpr
(
i
.
value
>
1
)
{
// recursively call f/fs
return
container_reverse_exclusive_scan_impl
(
x
,
reduce
,
i
-
Number
<
1
>
{},
y_new
,
r_new
);
}
else
{
return
y_new
;
}
}
template
<
typename
...
Xs
,
typename
Reduce
,
typename
Init
>
__host__
__device__
constexpr
auto
container_reverse_exclusive_scan
(
const
Tuple
<
Xs
...
>&
x
,
Reduce
reduce
,
Init
init
)
{
constexpr
index_t
NSize
=
sizeof
...(
Xs
);
return
container_reverse_exclusive_scan_impl
(
x
,
reduce
,
Number
<
NSize
-
1
>
{},
make_tuple
(
init
),
init
);
}
#endif
// TODO: update to like container_reverse_exclusive_scan to deal with Tuple of Numebr<>
// TODO: update to like container_reverse_exclusive_scan to deal with Tuple of Numebr<>
template
<
typename
...
Xs
,
typename
Reduce
,
typename
TData
>
template
<
typename
...
Xs
,
typename
Reduce
,
typename
TData
>
...
...
driver/src/conv_driver.cpp
View file @
4f31669f
...
@@ -701,7 +701,7 @@ int main(int argc, char* argv[])
...
@@ -701,7 +701,7 @@ int main(int argc, char* argv[])
LeftPads
{},
LeftPads
{},
RightPads
{},
RightPads
{},
nrepeat
);
nrepeat
);
#elif
1
#elif
0
device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk
(
in_nchw_desc
,
device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk
(
in_nchw_desc
,
in_nchw
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx_desc
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment