Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c03945f0
"...composable_kernel.git" did not exist on "1f543bfa79de0687f9b6144b5dea10f4190c8892"
Commit
c03945f0
authored
Apr 09, 2021
by
Chao Liu
Browse files
revert changes to buffer addressing that is irrelavent to magic number division
parent
0dc0fa5c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
83 deletions
+11
-83
composable_kernel/include/utility/amd_buffer_addressing.hpp
composable_kernel/include/utility/amd_buffer_addressing.hpp
+0
-11
composable_kernel/include/utility/amd_buffer_addressing_v2.hpp
...sable_kernel/include/utility/amd_buffer_addressing_v2.hpp
+8
-68
composable_kernel/include/utility/config.amd.hpp.in
composable_kernel/include/utility/config.amd.hpp.in
+3
-3
composable_kernel/include/utility/float_type.amd.hpp.in
composable_kernel/include/utility/float_type.amd.hpp.in
+0
-1
No files found.
composable_kernel/include/utility/amd_buffer_addressing.hpp
View file @
c03945f0
...
@@ -6,17 +6,6 @@
...
@@ -6,17 +6,6 @@
namespace
ck
{
namespace
ck
{
template
<
typename
T
>
union
BufferResource
{
// 128 bit SGPRs to supply buffer resource in buffer instructions
// https://rocm-documentation.readthedocs.io/en/latest/GCN_ISA_Manuals/testdocbook.html#vector-memory-buffer-instructions
int32x4_t
data
;
T
*
address
[
2
];
int32_t
range
[
4
];
int32_t
config
[
4
];
};
__device__
float
__llvm_amdgcn_buffer_load_f32
(
int32x4_t
srsrc
,
__device__
float
__llvm_amdgcn_buffer_load_f32
(
int32x4_t
srsrc
,
index_t
vindex
,
index_t
vindex
,
index_t
offset
,
index_t
offset
,
...
...
composable_kernel/include/utility/amd_buffer_addressing_v2.hpp
View file @
c03945f0
...
@@ -6,27 +6,27 @@
...
@@ -6,27 +6,27 @@
namespace
ck
{
namespace
ck
{
template
<
typename
T
>
template
<
typename
T
>
union
BufferResource
_v2
union
BufferResource
{
{
// 128 bit SGPRs to supply buffer resource in buffer instructions
// 128 bit SGPRs to supply buffer resource in buffer instructions
// https://rocm-documentation.readthedocs.io/en/latest/GCN_ISA_Manuals/testdocbook.html#vector-memory-buffer-instructions
// https://rocm-documentation.readthedocs.io/en/latest/GCN_ISA_Manuals/testdocbook.html#vector-memory-buffer-instructions
int32x4_t
data
;
int32x4_t
data
;
StaticallyIndexedArray
<
T
*
,
2
>
address
;
T
*
address
[
2
]
;
StaticallyIndexedArray
<
int32_t
,
4
>
range
;
int32_t
range
[
4
]
;
StaticallyIndexedArray
<
int32_t
,
4
>
config
;
int32_t
config
[
4
]
;
};
};
template
<
typename
T
>
template
<
typename
T
>
__device__
int32x4_t
make_wave_buffer_resource
(
T
*
p_wave
,
index_t
data_space_size
)
__device__
int32x4_t
make_wave_buffer_resource
(
T
*
p_wave
,
index_t
data_space_size
)
{
{
BufferResource
_v2
<
T
>
wave_buffer_resource
;
BufferResource
<
T
>
wave_buffer_resource
;
// wavewise base address (64 bit)
// wavewise base address (64 bit)
wave_buffer_resource
.
address
(
Number
<
0
>
{})
=
const_cast
<
remove_cv_t
<
T
>*>
(
p_wave
);
wave_buffer_resource
.
address
[
0
]
=
const_cast
<
remove_cv_t
<
T
>*>
(
p_wave
);
// wavewise range (32 bit)
// wavewise range (32 bit)
wave_buffer_resource
.
range
(
Number
<
2
>
{})
=
data_space_size
*
sizeof
(
T
);
wave_buffer_resource
.
range
[
2
]
=
data_space_size
*
sizeof
(
T
);
// wavewise setting (32 bit)
// wavewise setting (32 bit)
wave_buffer_resource
.
config
(
Number
<
3
>
{})
=
CK_BUFFER_RESOURCE_3RD_DWORD
;
wave_buffer_resource
.
config
[
3
]
=
CK_BUFFER_RESOURCE_3RD_DWORD
;
return
wave_buffer_resource
.
data
;
return
wave_buffer_resource
.
data
;
}
}
...
@@ -37,19 +37,6 @@ __llvm_amdgcn_raw_buffer_load_i8(int32x4_t srsrc,
...
@@ -37,19 +37,6 @@ __llvm_amdgcn_raw_buffer_load_i8(int32x4_t srsrc,
index_t
voffset
,
index_t
voffset
,
index_t
soffset
,
index_t
soffset
,
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.load.i8"
);
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.load.i8"
);
__device__
int8x2_t
__llvm_amdgcn_raw_buffer_load_i8x2
(
int32x4_t
srsrc
,
index_t
voffset
,
index_t
soffset
,
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.load.v2i8"
);
__device__
int8x4_t
__llvm_amdgcn_raw_buffer_load_i8x4
(
int32x4_t
srsrc
,
index_t
voffset
,
index_t
soffset
,
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.load.v4i8"
);
__device__
int16_t
__device__
int16_t
__llvm_amdgcn_raw_buffer_load_i16
(
int32x4_t
srsrc
,
__llvm_amdgcn_raw_buffer_load_i16
(
int32x4_t
srsrc
,
index_t
voffset
,
index_t
voffset
,
...
@@ -118,20 +105,6 @@ __llvm_amdgcn_raw_buffer_store_i8(int8_t vdata,
...
@@ -118,20 +105,6 @@ __llvm_amdgcn_raw_buffer_store_i8(int8_t vdata,
index_t
soffset
,
index_t
soffset
,
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.store.i8"
);
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.store.i8"
);
__device__
void
__llvm_amdgcn_raw_buffer_store_i8x2
(
int8x2_t
vdata
,
int32x4_t
rsrc
,
index_t
voffset
,
index_t
soffset
,
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.store.v2i8"
);
__device__
void
__llvm_amdgcn_raw_buffer_store_i8x4
(
int8x4_t
vdata
,
int32x4_t
rsrc
,
index_t
voffset
,
index_t
soffset
,
index_t
glc_slc
)
__asm
(
"llvm.amdgcn.raw.buffer.store.v4i8"
);
__device__
void
__device__
void
__llvm_amdgcn_raw_buffer_store_i16
(
int16_t
vdata
,
__llvm_amdgcn_raw_buffer_store_i16
(
int16_t
vdata
,
int32x4_t
rsrc
,
int32x4_t
rsrc
,
...
@@ -210,7 +183,6 @@ amd_buffer_load_impl_v2(int32x4_t src_wave_buffer_resource,
...
@@ -210,7 +183,6 @@ amd_buffer_load_impl_v2(int32x4_t src_wave_buffer_resource,
index_t
src_wave_addr_offset
)
index_t
src_wave_addr_offset
)
{
{
static_assert
((
is_same
<
T
,
float
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
))
||
static_assert
((
is_same
<
T
,
float
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
))
||
(
is_same
<
T
,
int8_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
||
N
==
8
))
||
(
is_same
<
T
,
half_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
))
||
(
is_same
<
T
,
half_t
>::
value
&&
(
N
==
1
||
N
==
2
||
N
==
4
))
||
(
is_same
<
T
,
half2_t
>::
value
&&
(
N
==
1
))
||
(
is_same
<
T
,
half2_t
>::
value
&&
(
N
==
1
))
||
(
is_same
<
T
,
half4_t
>::
value
&&
(
N
==
1
))
||
(
is_same
<
T
,
half4_t
>::
value
&&
(
N
==
1
))
||
...
@@ -334,38 +306,6 @@ amd_buffer_load_impl_v2(int32x4_t src_wave_buffer_resource,
...
@@ -334,38 +306,6 @@ amd_buffer_load_impl_v2(int32x4_t src_wave_buffer_resource,
src_thread_addr_offset
,
src_thread_addr_offset
,
src_wave_addr_offset
+
4
*
sizeof
(
int32_t
),
src_wave_addr_offset
+
4
*
sizeof
(
int32_t
),
0
);
0
);
return
tmp
.
Vector
();
}
}
else
if
constexpr
(
is_same
<
T
,
int8_t
>::
value
)
{
if
constexpr
(
N
==
1
)
{
return
__llvm_amdgcn_raw_buffer_load_i8
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
}
else
if
constexpr
(
N
==
2
)
{
return
__llvm_amdgcn_raw_buffer_load_i8x2
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
}
else
if
constexpr
(
N
==
4
)
{
return
__llvm_amdgcn_raw_buffer_load_i8x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
}
else
if
constexpr
(
N
==
8
)
{
vector_type
<
int8_t
,
8
>
tmp
;
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
0
>
{})
=
__llvm_amdgcn_raw_buffer_load_i8x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
1
>
{})
=
__llvm_amdgcn_raw_buffer_load_i8x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
+
4
*
sizeof
(
int8_t
),
0
);
return
tmp
.
Vector
();
return
tmp
.
Vector
();
}
}
...
...
composable_kernel/include/utility/config.amd.hpp.in
View file @
c03945f0
...
@@ -14,11 +14,11 @@
...
@@ -14,11 +14,11 @@
#define CK_DEVICE_BACKEND_AMD 1
#define CK_DEVICE_BACKEND_AMD 1
// GPU ID
// GPU ID
#if
1
#if
0
#define CK_AMD_GPU_GFX906 1
#define CK_AMD_GPU_GFX906 1
#elif 0
#elif 0
#define CK_AMD_GPU_GFX908 1
#define CK_AMD_GPU_GFX908 1
#elif
0
#elif
1
#define CK_AMD_GPU_GFX1030 1
#define CK_AMD_GPU_GFX1030 1
#endif
#endif
...
@@ -88,7 +88,7 @@
...
@@ -88,7 +88,7 @@
// experimental implementation
// experimental implementation
#ifndef CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
#ifndef CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
0
#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK
1
#endif
#endif
#ifndef CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK
#ifndef CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK
...
...
composable_kernel/include/utility/float_type.amd.hpp.in
View file @
c03945f0
...
@@ -437,7 +437,6 @@ struct vector_type<int8_t, 16>
...
@@ -437,7 +437,6 @@ struct vector_type<int8_t, 16>
// i8
// i8
// hack for int8x4_t, because compiler does not have native support for int8x4_t
// hack for int8x4_t, because compiler does not have native support for int8x4_t
// int8x4_t is defined as int32_t
// int8x4_t is defined as int32_t
using int8x2_t = typename vector_type<int8_t, 2>::type;
using int8x4_t = typename vector_type<int8_t, 4>::type;
using int8x4_t = typename vector_type<int8_t, 4>::type;
using int8x8_t = typename vector_type<int8_t, 8>::type;
using int8x8_t = typename vector_type<int8_t, 8>::type;
using int8x16_t = typename vector_type<int8_t, 16>::type;
using int8x16_t = typename vector_type<int8_t, 16>::type;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment