Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
38c8f7d6
Commit
38c8f7d6
authored
Jan 16, 2025
by
illsilin
Browse files
fix clang format
parent
ae7cef7c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
34 additions
and
29 deletions
+34
-29
include/ck_tile/core/arch/amd_buffer_addressing.hpp
include/ck_tile/core/arch/amd_buffer_addressing.hpp
+31
-26
include/ck_tile/ops/flatmm/block/uk/flatmm_sn_uk_gfx9_32x128x512_1x4x1_16x16x16.inc
.../block/uk/flatmm_sn_uk_gfx9_32x128x512_1x4x1_16x16x16.inc
+1
-1
include/ck_tile/ops/flatmm/block/uk/flatmm_sn_uk_gfx9_32x128x512_1x4x1_16x16x16_itl.inc
...ck/uk/flatmm_sn_uk_gfx9_32x128x512_1x4x1_16x16x16_itl.inc
+1
-1
include/ck_tile/ops/flatmm/block/uk/flatmm_uk_gfx9_32x512x128_1x1x1_16x16x16.inc
...tmm/block/uk/flatmm_uk_gfx9_32x512x128_1x1x1_16x16x16.inc
+1
-1
No files found.
include/ck_tile/core/arch/amd_buffer_addressing.hpp
View file @
38c8f7d6
...
...
@@ -63,7 +63,8 @@ struct buffer_load;
// TODO: strict aliasing rule seems fail when reinterpret_cast between vector type
// (exp_vector_type(xxx))
union
BR
{
union
BR
{
int32x4_t
res
;
__amdgpu_buffer_rsrc_t
opaque
;
};
...
...
@@ -83,7 +84,8 @@ struct buffer_load<16, pre_nop>
static_assert
(
sizeof
(
T
)
==
16
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
16
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b128
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b128
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -102,7 +104,8 @@ struct buffer_load<8, pre_nop>
static_assert
(
sizeof
(
T
)
==
8
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
8
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b64
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b64
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -121,7 +124,8 @@ struct buffer_load<4, pre_nop>
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
4
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b32
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b32
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -140,7 +144,8 @@ struct buffer_load<2, pre_nop>
static_assert
(
sizeof
(
T
)
==
4
);
// subdword is buggy, use dword buf and convert manually
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
2
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b16
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b16
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -159,12 +164,14 @@ struct buffer_load<1, pre_nop>
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
typename
impl
::
buffer_load_trait
<
1
,
T
>::
payload_t
;
const
BR
br
{
res
};
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b16
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
reinterpret_cast
<
mbuf_t
&>
(
value
)
=
__builtin_amdgcn_raw_buffer_load_b16
(
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
template
<
index_t
bytes
,
bool
pre_nop
=
false
>
struct
buffer_load_if
{
struct
buffer_load_if
{
template
<
typename
T
>
CK_TILE_DEVICE
void
operator
()(
T
&
value
,
int32x4_t
res
/*buffer resource*/
,
...
...
@@ -175,14 +182,10 @@ struct buffer_load_if {
bool_constant
<
pre_nop
>
=
{})
{
static_assert
(
sizeof
(
T
)
==
16
);
if
LIKELY
(
1
<=
flag
)
{
buffer_load
<
bytes
,
pre_nop
>
{}(
value
,
res
,
v_offset
,
s_offset
,
i_offset
,
flag
,
bool_constant
<
pre_nop
>
{});
if
LIKELY
(
1
<=
flag
)
{
buffer_load
<
bytes
,
pre_nop
>
{}(
value
,
res
,
v_offset
,
s_offset
,
i_offset
,
flag
,
bool_constant
<
pre_nop
>
{});
}
}
};
...
...
@@ -205,7 +208,8 @@ struct buffer_store<16>
static_assert
(
sizeof
(
T
)
==
16
);
using
mbuf_t
=
fp32x4_t
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b128
(
static_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_store_b128
(
static_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -223,7 +227,8 @@ struct buffer_store<8>
static_assert
(
sizeof
(
T
)
==
8
);
using
mbuf_t
=
fp32x2_t
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b64
(
__builtin_bit_cast
(
mbuf_t
,
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_store_b64
(
__builtin_bit_cast
(
mbuf_t
,
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -241,7 +246,8 @@ struct buffer_store<4>
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
float
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b32
(
static_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_store_b32
(
static_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -259,7 +265,8 @@ struct buffer_store<2>
static_assert
(
sizeof
(
T
)
==
2
);
using
mbuf_t
=
short
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b16
(
__builtin_bit_cast
(
mbuf_t
,
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_store_b16
(
__builtin_bit_cast
(
mbuf_t
,
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -277,7 +284,8 @@ struct buffer_store<1>
static_assert
(
sizeof
(
T
)
==
4
);
using
mbuf_t
=
float
;
const
BR
br
{
res
};
__builtin_amdgcn_raw_buffer_store_b8
(
static_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
__builtin_amdgcn_raw_buffer_store_b8
(
static_cast
<
mbuf_t
>
(
value
),
br
.
opaque
,
v_offset
,
s_offset
,
0
);
}
};
...
...
@@ -292,12 +300,9 @@ struct buffer_store_if
index_t
i_offset
/*max 0xFFF*/
,
index_t
flag
=
1
)
{
if
LIKELY
(
1
<=
flag
)
{
buffer_store
<
bytes
>
{}(
value
,
res
,
v_offset
,
s_offset
,
i_offset
);
if
LIKELY
(
1
<=
flag
)
{
buffer_store
<
bytes
>
{}(
value
,
res
,
v_offset
,
s_offset
,
i_offset
);
}
}
};
...
...
include/ck_tile/ops/flatmm/block/uk/flatmm_sn_uk_gfx9_32x128x512_1x4x1_16x16x16.inc
View file @
38c8f7d6
...
...
@@ -824,4 +824,4 @@
#undef _UK_PK_CVT_
#undef _UK_ATOMIC_ADD_
#undef CK_TILE_FLATMM_UK_MFMA
// clang-format on
// clang-format on
include/ck_tile/ops/flatmm/block/uk/flatmm_sn_uk_gfx9_32x128x512_1x4x1_16x16x16_itl.inc
View file @
38c8f7d6
...
...
@@ -722,4 +722,4 @@
#undef _UK_PK_CVT_
#undef _UK_ATOMIC_ADD_
#undef CK_TILE_FLATMM_UK_MFMA
// clang-format on
// clang-format on
include/ck_tile/ops/flatmm/block/uk/flatmm_uk_gfx9_32x512x128_1x1x1_16x16x16.inc
View file @
38c8f7d6
...
...
@@ -771,4 +771,4 @@
#undef _UK_MFMA_
#undef CK_TILE_FLATMM_UK_2B
#undef CK_TILE_FLATMM_UK_MFMA
// clang-format on
// clang-format on
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment