Commit e64a79c5 authored by Jing Zhang's avatar Jing Zhang
Browse files

clean code

parent 7bbcd0fe
...@@ -55,8 +55,6 @@ struct ThreadwiseGenericTensorSliceCopy_v5 ...@@ -55,8 +55,6 @@ struct ThreadwiseGenericTensorSliceCopy_v5
"wrong! cannot evenly divide"); "wrong! cannot evenly divide");
static_assert(ThreadBufferSize == 4, ""); static_assert(ThreadBufferSize == 4, "");
// TODO:: sanity-check if vectorized memory read/write is allowed on src and dst
} }
__device__ constexpr ThreadwiseGenericTensorSliceCopy_v5() __device__ constexpr ThreadwiseGenericTensorSliceCopy_v5()
...@@ -147,9 +145,6 @@ struct ThreadwiseGenericTensorSliceCopy_v5 ...@@ -147,9 +145,6 @@ struct ThreadwiseGenericTensorSliceCopy_v5
constexpr auto buff_off = constexpr auto buff_off =
ThreadBufferDesc::CalculateOffset(to_multi_index(long_vector_data_begin_id)); ThreadBufferDesc::CalculateOffset(to_multi_index(long_vector_data_begin_id));
// static_assert(buff_off == 0 || buff_off == 1 || buff_off == 2 || buff_off == 3,
// "");
thread_buff.s1(Number<buff_off>{}) = src_buff; thread_buff.s1(Number<buff_off>{}) = src_buff;
}); });
} }
......
...@@ -24,76 +24,25 @@ typedef ushort ushort8_t __attribute__((ext_vector_type(8))); ...@@ -24,76 +24,25 @@ typedef ushort ushort8_t __attribute__((ext_vector_type(8)));
union float_vec2_t union float_vec2_t
{ {
Tuple<float, float> s1; StaticallyIndexedArray<float, 2> s1;
float2_t s2; float2_t s2;
__host__ __device__ constexpr float_vec2_t() {} __host__ __device__ constexpr float_vec2_t() {s2 = {0, 0};}
}; };
union float_vec4_t union float_vec4_t
{ {
Tuple<float, float, float, float> s1; StaticallyIndexedArray<float, 4> s1;
struct{
float e0, e1, e2, e3;
} ss1;
float4_t s4; float4_t s4;
float n[4]; __host__ __device__ constexpr float_vec4_t() {s4 = {0, 0, 0, 0};}
__host__ __device__ constexpr float_vec4_t() {}
template<typename T, index_t i>
__host__ __device__ void set(const T val);
template<>
__host__ __device__ void set<float, 0>(const float val)
{
ss1.e0 = val;
}
template<>
__host__ __device__ void set<float, 1>(const float val)
{
ss1.e1 = val;
}
template<>
__host__ __device__ void set<float, 2>(const float val)
{
ss1.e2 = val;
}
template<>
__host__ __device__ void set<float, 3>(const float val)
{
ss1.e3 = val;
}
}; };
union float_vec8_t union float_vec8_t
{ {
Tuple<float, float, float, float, float, float, float, float> s1; StaticallyIndexedArray<float, 8> s1;
Tuple<float_vec2_t, float_vec2_t, float_vec2_t, float_vec2_t> s2; StaticallyIndexedArray<float_vec2_t, 4> s2;
struct{ StaticallyIndexedArray<float_vec4_t, 2> s4;
float_vec4_t e0;
float_vec4_t e1;
} ss4;
Tuple<float_vec4_t, float_vec4_t> s4;
float8_t s8; float8_t s8;
float n[8];
__host__ __device__ constexpr float_vec8_t() {} __host__ __device__ constexpr float_vec8_t() {}
template<typename T, index_t i>
__host__ __device__ void set(const T val);
template<>
__host__ __device__ void set<float_vec4_t, 0>(const float_vec4_t val)
{
ss4.e0 = val;
}
template<>
__host__ __device__ void set<float_vec4_t, 1>(const float_vec4_t val)
{
ss4.e1 = val;
}
}; };
......
...@@ -7,10 +7,10 @@ BC_FILE=$1 ...@@ -7,10 +7,10 @@ BC_FILE=$1
/opt/rocm/llvm/bin/opt -S -sroa inline.ll > sora.ll /opt/rocm/llvm/bin/opt -S -sroa inline.ll > sora.ll
/opt/rocm/llvm/bin/opt -S -O3 sora.ll > o3.ll /opt/rocm/llvm/bin/opt -S -O3 sora.ll > o3.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 original.ll /opt/rocm/llvm/bin/llc -mcpu=gfx908 original.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 inline.ll /opt/rocm/llvm/bin/llc -mcpu=gfx908 inline.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 sora.ll /opt/rocm/llvm/bin/llc -mcpu=gfx908 sora.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 o3.ll /opt/rocm/llvm/bin/llc -mcpu=gfx908 o3.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa inline.ll > o3.ll #/opt/rocm/llvm/bin/opt -S -O3 -sroa inline.ll > o3.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa o3.ll > o3_2.ll #/opt/rocm/llvm/bin/opt -S -O3 -sroa o3.ll > o3_2.ll
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment