Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e64a79c5
Commit
e64a79c5
authored
Dec 12, 2020
by
Jing Zhang
Browse files
clean code
parent
7bbcd0fe
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
67 deletions
+11
-67
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
...sor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
+0
-5
composable_kernel/include/utility/float_type.amd.hpp.in
composable_kernel/include/utility/float_type.amd.hpp.in
+7
-58
script/hipclang_opt.sh
script/hipclang_opt.sh
+4
-4
No files found.
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
View file @
e64a79c5
...
...
@@ -55,8 +55,6 @@ struct ThreadwiseGenericTensorSliceCopy_v5
"wrong! cannot evenly divide"
);
static_assert
(
ThreadBufferSize
==
4
,
""
);
// TODO:: sanity-check if vectorized memory read/write is allowed on src and dst
}
__device__
constexpr
ThreadwiseGenericTensorSliceCopy_v5
()
...
...
@@ -147,9 +145,6 @@ struct ThreadwiseGenericTensorSliceCopy_v5
constexpr
auto
buff_off
=
ThreadBufferDesc
::
CalculateOffset
(
to_multi_index
(
long_vector_data_begin_id
));
// static_assert(buff_off == 0 || buff_off == 1 || buff_off == 2 || buff_off == 3,
// "");
thread_buff
.
s1
(
Number
<
buff_off
>
{})
=
src_buff
;
});
}
...
...
composable_kernel/include/utility/float_type.amd.hpp.in
View file @
e64a79c5
...
...
@@ -24,76 +24,25 @@ typedef ushort ushort8_t __attribute__((ext_vector_type(8)));
union float_vec2_t
{
Tuple
<float,
float
> s1;
StaticallyIndexedArray
<float,
2
> s1;
float2_t s2;
__host__ __device__ constexpr float_vec2_t() {}
__host__ __device__ constexpr float_vec2_t() {
s2 = {0, 0};
}
};
union float_vec4_t
{
Tuple<float, float, float, float> s1;
struct{
float e0, e1, e2, e3;
} ss1;
StaticallyIndexedArray<float, 4> s1;
float4_t s4;
float n[4];
__host__ __device__ constexpr float_vec4_t() {}
template<typename T, index_t i>
__host__ __device__ void set(const T val);
template<>
__host__ __device__ void set<float, 0>(const float val)
{
ss1.e0 = val;
}
template<>
__host__ __device__ void set<float, 1>(const float val)
{
ss1.e1 = val;
}
template<>
__host__ __device__ void set<float, 2>(const float val)
{
ss1.e2 = val;
}
template<>
__host__ __device__ void set<float, 3>(const float val)
{
ss1.e3 = val;
}
__host__ __device__ constexpr float_vec4_t() {s4 = {0, 0, 0, 0};}
};
union float_vec8_t
{
Tuple<float, float, float, float, float, float, float, float> s1;
Tuple<float_vec2_t, float_vec2_t, float_vec2_t, float_vec2_t> s2;
struct{
float_vec4_t e0;
float_vec4_t e1;
} ss4;
Tuple<float_vec4_t, float_vec4_t> s4;
StaticallyIndexedArray<float, 8> s1;
StaticallyIndexedArray<float_vec2_t, 4> s2;
StaticallyIndexedArray<float_vec4_t, 2> s4;
float8_t s8;
float n[8];
__host__ __device__ constexpr float_vec8_t() {}
template<typename T, index_t i>
__host__ __device__ void set(const T val);
template<>
__host__ __device__ void set<float_vec4_t, 0>(const float_vec4_t val)
{
ss4.e0 = val;
}
template<>
__host__ __device__ void set<float_vec4_t, 1>(const float_vec4_t val)
{
ss4.e1 = val;
}
};
...
...
script/hipclang_opt.sh
View file @
e64a79c5
...
...
@@ -7,10 +7,10 @@ BC_FILE=$1
/opt/rocm/llvm/bin/opt
-S
-sroa
inline.ll
>
sora.ll
/opt/rocm/llvm/bin/opt
-S
-O3
sora.ll
>
o3.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
6
original.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
6
inline.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
6
sora.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
6
o3.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
8
original.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
8
inline.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
8
sora.ll
/opt/rocm/llvm/bin/llc
-mcpu
=
gfx90
8
o3.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa inline.ll > o3.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa o3.ll > o3_2.ll
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment