Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
69fea593
Commit
69fea593
authored
Sep 15, 2019
by
Chao Liu
Browse files
amd build
parent
940949d9
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
25 additions
and
20 deletions
+25
-20
composable_kernel/include/tensor_description/tensor_coordinate.hpp
...e_kernel/include/tensor_description/tensor_coordinate.hpp
+2
-2
composable_kernel/include/tensor_description/tensor_coordinate_v2.hpp
...ernel/include/tensor_description/tensor_coordinate_v2.hpp
+2
-2
composable_kernel/include/utility/config_amd.hpp.in
composable_kernel/include/utility/config_amd.hpp.in
+9
-2
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+2
-4
driver/src/driver.cpp
driver/src/driver.cpp
+10
-10
No files found.
composable_kernel/include/tensor_description/tensor_coordinate.hpp
View file @
69fea593
composable_kernel/include/tensor_description/tensor_coordinate_v2.hpp
View file @
69fea593
composable_kernel/include/utility/config_amd.hpp.in
View file @
69fea593
...
...
@@ -13,13 +13,20 @@
namespace ck {
using unsigned_t = uint32_t;
using signed_t = int;
#if 0 // debug
using index_t = unsigned_t;
#else
using index_t = signed_t;
#endif
// For some reason, HIP compiler need this definition to generate optimal load and store
// instruction
typedef float float2_t __attribute__((ext_vector_type(2)));
typedef float float4_t __attribute__((ext_vector_type(4)));
using index_t = uint32_t;
template <class T>
__device__ void fused_multiply_accumulate(T& d, const T& s0, const T& s1)
{
...
...
composable_kernel/include/utility/tuple.hpp
View file @
69fea593
...
...
@@ -51,11 +51,9 @@ struct TupleImpl;
template
<
index_t
...
Is
,
typename
...
Xs
>
struct
TupleImpl
<
Sequence
<
Is
...
>
,
Xs
...
>
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
...
{
#if 1
__host__
__device__
explicit
constexpr
TupleImpl
()
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
()...
{
}
#endif
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
TupleImpl
(
Ys
&&
...
ys
)
...
...
@@ -95,14 +93,14 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
__host__
__device__
constexpr
const
auto
&
At
(
Number
<
I
>
)
const
{
static_assert
(
I
<
base
::
Size
(),
"wrong! out of range"
);
return
GetElementByKey
(
detail
::
TupleElementKey
<
I
>
{});
return
base
::
GetElementByKey
(
detail
::
TupleElementKey
<
I
>
{});
}
template
<
index_t
I
>
__host__
__device__
constexpr
auto
&
At
(
Number
<
I
>
)
{
static_assert
(
I
<
base
::
Size
(),
"wrong! out of range"
);
return
GetElementByKey
(
detail
::
TupleElementKey
<
I
>
{});
return
base
::
GetElementByKey
(
detail
::
TupleElementKey
<
I
>
{});
}
};
...
...
driver/src/driver.cpp
View file @
69fea593
...
...
@@ -74,20 +74,20 @@ int main(int argc, char* argv[])
{
using
namespace
ck
;
#if
0
constexpr index_t N =
3
2;
constexpr index_t C =
8
;
constexpr index_t HI = 1;
constexpr index_t WI = 1;
constexpr index_t K =
128
;
constexpr index_t Y = 1;
constexpr index_t X = 1;
#if
1
constexpr
index_t
N
=
2
56
;
constexpr
index_t
C
=
64
;
constexpr
index_t
HI
=
1
7
;
constexpr
index_t
WI
=
1
7
;
constexpr
index_t
K
=
256
;
constexpr
index_t
Y
=
1
7
;
constexpr
index_t
X
=
1
7
;
using
ConvStrides
=
Sequence
<
1
,
1
>
;
using
ConvDilations
=
Sequence
<
1
,
1
>
;
using LeftPads = Sequence<
1
,
1
>;
using RightPads = Sequence<0,
0
>;
using
LeftPads
=
Sequence
<
0
,
3
>
;
using
RightPads
=
Sequence
<
0
,
3
>
;
#elif 1
// 3x3, 34x34
constexpr
index_t
N
=
64
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment