Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
1d011fef
Commit
1d011fef
authored
Apr 01, 2021
by
root
Browse files
fix load/store
parent
3321471c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
21 additions
and
13 deletions
+21
-13
composable_kernel/include/utility/amd_buffer_addressing_v2.hpp
...sable_kernel/include/utility/amd_buffer_addressing_v2.hpp
+18
-10
composable_kernel/include/utility/config.amd.hpp.in
composable_kernel/include/utility/config.amd.hpp.in
+2
-2
script/cmake-rocm3.7.sh
script/cmake-rocm3.7.sh
+1
-1
No files found.
composable_kernel/include/utility/amd_buffer_addressing_v2.hpp
View file @
1d011fef
...
...
@@ -216,8 +216,11 @@ amd_buffer_load_impl_v2(int32x4_t src_wave_buffer_resource,
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
0
>
{})
=
__llvm_amdgcn_raw_buffer_load_fp32x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
1
>
{})
=
__llvm_amdgcn_raw_buffer_load_fp32x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
4
*
sizeof
(
float
),
0
);
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
1
>
{})
=
__llvm_amdgcn_raw_buffer_load_fp32x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
+
4
*
sizeof
(
float
),
0
);
return
tmp
.
Vector
();
}
...
...
@@ -265,8 +268,11 @@ amd_buffer_load_impl_v2(int32x4_t src_wave_buffer_resource,
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
0
>
{})
=
__llvm_amdgcn_raw_buffer_load_fp16x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
1
>
{})
=
__llvm_amdgcn_raw_buffer_load_fp16x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
1
>
{})
=
__llvm_amdgcn_raw_buffer_load_fp16x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
+
4
*
sizeof
(
half_t
),
0
);
return
tmp
.
Vector
();
}
...
...
@@ -295,8 +301,11 @@ amd_buffer_load_impl_v2(int32x4_t src_wave_buffer_resource,
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
0
>
{})
=
__llvm_amdgcn_raw_buffer_load_i32x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
,
0
);
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
1
>
{})
=
__llvm_amdgcn_raw_buffer_load_i32x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
4
*
sizeof
(
int32_t
),
0
);
tmp
.
Vectors
(
Number
<
4
>
{})(
Number
<
1
>
{})
=
__llvm_amdgcn_raw_buffer_load_i32x4
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
src_wave_addr_offset
+
4
*
sizeof
(
int32_t
),
0
);
return
tmp
.
Vector
();
}
...
...
@@ -457,19 +466,18 @@ __device__ void amd_buffer_store_impl_v2(const typename vector_type<T, N>::type
}
else
if
constexpr
(
N
==
8
)
{
vector_type
<
half_t
,
8
>
tmp
;
tmp
.
Vector
()
=
src_thread_data
;
vector_type
<
half_t
,
8
>
tmp
{
src_thread_data
};
__llvm_amdgcn_raw_buffer_store_fp16x4
(
tmp
.
Vectors
(
Number
<
4
>
{})[
Number
<
0
>
{}],
dst_wave_buffer_resource
,
dst_thread_addr_offset
,
dst_wave_addr_offset
,
0
);
__llvm_amdgcn_raw_buffer_store_fp16x4
(
tmp
.
Vectors
(
Number
<
4
>
{})[
Number
<
1
>
{}],
dst_wave_buffer_resource
,
dst_thread_addr_offset
,
dst_wave_addr_offset
,
dst_wave_addr_offset
+
4
*
sizeof
(
half_t
)
,
0
);
}
}
...
...
composable_kernel/include/utility/config.amd.hpp.in
View file @
1d011fef
...
...
@@ -11,9 +11,9 @@
#define CK_DEVICE_BACKEND_AMD 1
// GPU ID
#define CK_AMD_GPU_GFX906
0
#define CK_AMD_GPU_GFX906
1
#define CK_AMD_GPU_GFX908 0
#define CK_AMD_GPU_GFX1030
1
#define CK_AMD_GPU_GFX1030
0
// HIP version
#ifndef CK_HIP_VERSION_FLAT
...
...
script/cmake-rocm3.7.sh
View file @
1d011fef
...
...
@@ -10,7 +10,7 @@ cmake
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
DEVICE_BACKEND
=
"AMD"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx
1030
-gline-tables-only -save-temps=
$CWD
-ftemplate-backtrace-limit=0"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx
906
-gline-tables-only -save-temps=
$CWD
-ftemplate-backtrace-limit=0"
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_PREFIX_PATH
=
"/opt/rocm"
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment