Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
381c34e6
Commit
381c34e6
authored
Jan 20, 2025
by
Juan Manuel Martinez Caamaño
Browse files
Fix compiler errors & clean the code
parent
38c8f7d6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
34 deletions
+32
-34
include/ck_tile/core/arch/amd_buffer_addressing.hpp
include/ck_tile/core/arch/amd_buffer_addressing.hpp
+32
-34
No files found.
include/ck_tile/core/arch/amd_buffer_addressing.hpp
View file @
381c34e6
...
@@ -12,7 +12,14 @@
...
@@ -12,7 +12,14 @@
#include "ck_tile/core/utility/bit_cast.hpp"
#include "ck_tile/core/utility/bit_cast.hpp"
#include "ck_tile/core/utility/functional.hpp"
#include "ck_tile/core/utility/functional.hpp"
// This attribute gives a hint to the compiler that a branch is likely to be taken.
// Then, the compiler should remove if possible the associated s_cbranch_execz branch that would
// have been generated.
#if __cplusplus >= 202002L
#define LIKELY(x) (x) [[likely]]
#else
#define LIKELY(x) (__builtin_expect(!!(x), 1))
#define LIKELY(x) (__builtin_expect(!!(x), 1))
#endif
namespace
ck_tile
{
namespace
ck_tile
{
...
@@ -63,11 +70,12 @@ struct buffer_load;
...
@@ -63,11 +70,12 @@ struct buffer_load;
// TODO: strict aliasing rule seems fail when reinterpret_cast between vector type
// TODO: strict aliasing rule seems fail when reinterpret_cast between vector type
// (exp_vector_type(xxx))
// (exp_vector_type(xxx))
union
BR
CK_TILE_DEVICE
__amdgpu_buffer_rsrc_t
cast_to_amdgpu_buffer_rsrc_t
(
int32x4_t
res
)
{
{
int32x4_t
res
;
__amdgpu_buffer_rsrc_t
as_rsrc
=
__builtin_bit_cast
(
__amdgpu_buffer_rsrc_t
,
res
);
__amdgpu_buffer_rsrc_t
opaque
;
static_assert
(
sizeof
(
res
)
==
sizeof
(
as_rsrc
)
&&
"Size of buffer resource should match"
);
};
return
as_rsrc
;
}
template
<
bool
pre_nop
>
template
<
bool
pre_nop
>
struct
buffer_load
<
16
,
pre_nop
>
struct
buffer_load
<
16
,
pre_nop
>
...
@@ -82,10 +90,9 @@ struct buffer_load<16, pre_nop>
...
@@ -82,10 +90,9 @@ struct buffer_load<16, pre_nop>
bool_constant
<
pre_nop
>
=
{})
bool_constant
<
pre_nop
>
=
{})
{
{
static_assert
(
sizeof
(
T
)
==
16
);
static_assert
(
sizeof
(
T
)
==
16
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
16
,
T
>::
payload_t
;
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
16
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b128
(
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
cast_to_amdgpu_buffer_rsrc_t
(
res
),
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_load_b128
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -102,10 +109,9 @@ struct buffer_load<8, pre_nop>
...
@@ -102,10 +109,9 @@ struct buffer_load<8, pre_nop>
bool_constant
<
pre_nop
>
=
{})
bool_constant
<
pre_nop
>
=
{})
{
{
static_assert
(
sizeof
(
T
)
==
8
);
static_assert
(
sizeof
(
T
)
==
8
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
8
,
T
>::
payload_t
;
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
8
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b64
(
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
cast_to_amdgpu_buffer_rsrc_t
(
res
),
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_load_b64
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -122,10 +128,9 @@ struct buffer_load<4, pre_nop>
...
@@ -122,10 +128,9 @@ struct buffer_load<4, pre_nop>
bool_constant
<
pre_nop
>
=
{})
bool_constant
<
pre_nop
>
=
{})
{
{
static_assert
(
sizeof
(
T
)
==
4
);
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
4
,
T
>::
payload_t
;
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
4
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b32
(
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
cast_to_amdgpu_buffer_rsrc_t
(
res
),
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_load_b32
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -142,10 +147,9 @@ struct buffer_load<2, pre_nop>
...
@@ -142,10 +147,9 @@ struct buffer_load<2, pre_nop>
bool_constant
<
pre_nop
>
=
{})
bool_constant
<
pre_nop
>
=
{})
{
{
static_assert
(
sizeof
(
T
)
==
4
);
// subdword is buggy, use dword buf and convert manually
static_assert
(
sizeof
(
T
)
==
4
);
// subdword is buggy, use dword buf and convert manually
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
2
,
T
>::
payload_t
;
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
2
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b16
(
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
cast_to_amdgpu_buffer_rsrc_t
(
res
),
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_load_b16
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -162,10 +166,9 @@ struct buffer_load<1, pre_nop>
...
@@ -162,10 +166,9 @@ struct buffer_load<1, pre_nop>
bool_constant
<
pre_nop
>
=
{})
bool_constant
<
pre_nop
>
=
{})
{
{
static_assert
(
sizeof
(
T
)
==
4
);
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
1
,
T
>::
payload_t
;
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
1
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b16
(
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
cast_to_amdgpu_buffer_rsrc_t
(
res
),
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_load_b16
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -207,9 +210,8 @@ struct buffer_store<16>
...
@@ -207,9 +210,8 @@ struct buffer_store<16>
{
{
static_assert
(
sizeof
(
T
)
==
16
);
static_assert
(
sizeof
(
T
)
==
16
);
using
mbuf_t
=
fp32x4_t
;
using
mbuf_t
=
fp32x4_t
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b128
(
__builtin_amdgcn_raw_buffer_store_b128
(
static
_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
bit
_cast
<
mbuf_t
>
(
value
),
cast_to_amdgpu_buffer_rsrc_t
(
res
)
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -226,9 +228,8 @@ struct buffer_store<8>
...
@@ -226,9 +228,8 @@ struct buffer_store<8>
{
{
static_assert
(
sizeof
(
T
)
==
8
);
static_assert
(
sizeof
(
T
)
==
8
);
using
mbuf_t
=
fp32x2_t
;
using
mbuf_t
=
fp32x2_t
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b64
(
__builtin_amdgcn_raw_buffer_store_b64
(
__builtin_
bit_cast
(
mbuf_t
,
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
bit_cast
<
mbuf_t
>
(
value
),
cast_to_amdgpu_buffer_rsrc_t
(
res
)
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -245,9 +246,8 @@ struct buffer_store<4>
...
@@ -245,9 +246,8 @@ struct buffer_store<4>
{
{
static_assert
(
sizeof
(
T
)
==
4
);
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
float
;
using
mbuf_t
=
float
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b32
(
__builtin_amdgcn_raw_buffer_store_b32
(
static
_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
bit
_cast
<
mbuf_t
>
(
value
),
cast_to_amdgpu_buffer_rsrc_t
(
res
)
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -264,9 +264,8 @@ struct buffer_store<2>
...
@@ -264,9 +264,8 @@ struct buffer_store<2>
{
{
static_assert
(
sizeof
(
T
)
==
2
);
static_assert
(
sizeof
(
T
)
==
2
);
using
mbuf_t
=
short
;
using
mbuf_t
=
short
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b16
(
__builtin_amdgcn_raw_buffer_store_b16
(
__builtin_
bit_cast
(
mbuf_t
,
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
bit_cast
<
mbuf_t
>
(
value
),
cast_to_amdgpu_buffer_rsrc_t
(
res
)
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
@@ -283,9 +282,8 @@ struct buffer_store<1>
...
@@ -283,9 +282,8 @@ struct buffer_store<1>
{
{
static_assert
(
sizeof
(
T
)
==
4
);
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
float
;
using
mbuf_t
=
float
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b8
(
__builtin_amdgcn_raw_buffer_store_b8
(
static
_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
bit
_cast
<
mbuf_t
>
(
value
),
cast_to_amdgpu_buffer_rsrc_t
(
res
)
,
v_offset
,
s_offset
,
0
);
}
}
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment