"docs/git@developer.sourcefind.cn:change/sglang.git" did not exist on "5a9a4f41c695daa8b46c25abe8200117e68fbab2"
Unverified Commit fcbb9788 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Dynamic tensor descriptor (#24)



* support dynamic tensor descriptor

* use buffer load OOB feature for padding case

* add navi support

* add int8x4 inference kernel
Co-authored-by: default avatarChao Liu <chao@ixt-rack-81.local.lan>
Co-authored-by: default avatarJing Zhang <jizhan@amd.com>
parent bbcb67d0
conv_driver.cpp
\ No newline at end of file
...@@ -508,8 +508,8 @@ template <bool B> ...@@ -508,8 +508,8 @@ template <bool B>
struct bool_type : std::integral_constant<bool, B> struct bool_type : std::integral_constant<bool, B>
{ {
}; };
using std::true_type;
using std::false_type; using std::false_type;
using std::true_type;
/// Type traits for floating-point types. /// Type traits for floating-point types.
template <typename T> template <typename T>
...@@ -854,8 +854,8 @@ inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y, ...@@ -854,8 +854,8 @@ inline HALF_CONSTEXPR_NOERR unsigned int signal(unsigned int x, unsigned int y,
((x & 0x7FFF) > 0x7C00 && !(x & 0x200)) || ((y & 0x7FFF) > 0x7C00 && !(y & 0x200)) || ((x & 0x7FFF) > 0x7C00 && !(x & 0x200)) || ((y & 0x7FFF) > 0x7C00 && !(y & 0x200)) ||
((z & 0x7FFF) > 0x7C00 && !(z & 0x200))); ((z & 0x7FFF) > 0x7C00 && !(z & 0x200)));
#endif #endif
return ((x & 0x7FFF) > 0x7C00) ? (x | 0x200) : ((y & 0x7FFF) > 0x7C00) ? (y | 0x200) return ((x & 0x7FFF) > 0x7C00) ? (x | 0x200)
: (z | 0x200); : ((y & 0x7FFF) > 0x7C00) ? (y | 0x200) : (z | 0x200);
} }
/// Select value or signaling NaN. /// Select value or signaling NaN.
...@@ -1756,9 +1756,9 @@ uint32 mulhi(uint32 x, uint32 y) ...@@ -1756,9 +1756,9 @@ uint32 mulhi(uint32 x, uint32 y)
uint32 xy = (x >> 16) * (y & 0xFFFF), yx = (x & 0xFFFF) * (y >> 16), uint32 xy = (x >> 16) * (y & 0xFFFF), yx = (x & 0xFFFF) * (y >> 16),
c = (xy & 0xFFFF) + (yx & 0xFFFF) + (((x & 0xFFFF) * (y & 0xFFFF)) >> 16); c = (xy & 0xFFFF) + (yx & 0xFFFF) + (((x & 0xFFFF) * (y & 0xFFFF)) >> 16);
return (x >> 16) * (y >> 16) + (xy >> 16) + (yx >> 16) + (c >> 16) + return (x >> 16) * (y >> 16) + (xy >> 16) + (yx >> 16) + (c >> 16) +
((R == std::round_to_nearest) ? ((c >> 15) & 1) : (R == std::round_toward_infinity) ((R == std::round_to_nearest)
? ((c & 0xFFFF) != 0) ? ((c >> 15) & 1)
: 0); : (R == std::round_toward_infinity) ? ((c & 0xFFFF) != 0) : 0);
} }
/// 64-bit multiplication. /// 64-bit multiplication.
...@@ -2247,7 +2247,7 @@ unsigned int area(unsigned int arg) ...@@ -2247,7 +2247,7 @@ unsigned int area(unsigned int arg)
{ {
if(expy < 0) if(expy < 0)
{ {
r = 0x40000000 + ((expy > -30) ? ((r >> -expy) | r = 0x40000000 + ((expy > -30) ? ((r >> -expy) |
((r & ((static_cast<uint32>(1) << -expy) - 1)) != 0)) ((r & ((static_cast<uint32>(1) << -expy) - 1)) != 0))
: 1); : 1);
expy = 0; expy = 0;
...@@ -2379,10 +2379,12 @@ unsigned int erf(unsigned int arg) ...@@ -2379,10 +2379,12 @@ unsigned int erf(unsigned int arg)
t / t /
((x2.exp < 0) ? f31(exp2((x2.exp > -32) ? (x2.m >> -x2.exp) : 0, 30), 0) ((x2.exp < 0) ? f31(exp2((x2.exp > -32) ? (x2.m >> -x2.exp) : 0, 30), 0)
: f31(exp2((x2.m << x2.exp) & 0x7FFFFFFF, 22), x2.m >> (31 - x2.exp))); : f31(exp2((x2.m << x2.exp) & 0x7FFFFFFF, 22), x2.m >> (31 - x2.exp)));
return (!C || sign) ? fixed2half<R, 31, false, true, true>( return (!C || sign)
0x80000000 - (e.m >> (C - e.exp)), 14 + C, sign & (C - 1U)) ? fixed2half<R, 31, false, true, true>(
: (e.exp < -25) ? underflow<R>() : fixed2half<R, 30, false, false, true>( 0x80000000 - (e.m >> (C - e.exp)), 14 + C, sign & (C - 1U))
e.m >> 1, e.exp + 14, 0, e.m & 1); : (e.exp < -25)
? underflow<R>()
: fixed2half<R, 30, false, false, true>(e.m >> 1, e.exp + 14, 0, e.m & 1);
} }
/// Gamma function and postprocessing. /// Gamma function and postprocessing.
...@@ -2402,8 +2404,7 @@ unsigned int gamma(unsigned int arg) ...@@ -2402,8 +2404,7 @@ unsigned int gamma(unsigned int arg)
for(unsigned int i=0; i<5; ++i) for(unsigned int i=0; i<5; ++i)
s += p[i+1] / (arg+i); s += p[i+1] / (arg+i);
return std::log(s) + (arg-0.5)*std::log(t) - t; return std::log(s) + (arg-0.5)*std::log(t) - t;
*/ static const f31 */ static const f31 pi(0xC90FDAA2, 1),
pi(0xC90FDAA2, 1),
lbe(0xB8AA3B29, 0); lbe(0xB8AA3B29, 0);
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000; unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
bool bsign = sign != 0; bool bsign = sign != 0;
...@@ -2490,7 +2491,7 @@ unsigned int gamma(unsigned int arg) ...@@ -2490,7 +2491,7 @@ unsigned int gamma(unsigned int arg)
{ {
if(z.exp < 0) if(z.exp < 0)
s = s * z; s = s * z;
s = pi / s; s = pi / s;
if(s.exp < -24) if(s.exp < -24)
return underflow<R>(sign); return underflow<R>(sign);
} }
...@@ -2789,7 +2790,7 @@ inline half operator"" _h(long double value) ...@@ -2789,7 +2790,7 @@ inline half operator"" _h(long double value)
{ {
return half(detail::binary, detail::float2half<half::round_style>(value)); return half(detail::binary, detail::float2half<half::round_style>(value));
} }
} } // namespace literal
#endif #endif
namespace detail { namespace detail {
...@@ -2837,8 +2838,8 @@ struct half_caster<half, half, R> ...@@ -2837,8 +2838,8 @@ struct half_caster<half, half, R>
{ {
static half cast(half arg) { return arg; } static half cast(half arg) { return arg; }
}; };
} } // namespace detail
} } // namespace half_float
/// Extensions to the C++ standard library. /// Extensions to the C++ standard library.
namespace std { namespace std {
...@@ -3003,7 +3004,7 @@ struct hash<half_float::half> ...@@ -3003,7 +3004,7 @@ struct hash<half_float::half>
} }
}; };
#endif #endif
} } // namespace std
namespace half_float { namespace half_float {
/// \anchor compop /// \anchor compop
...@@ -3122,13 +3123,14 @@ inline half operator+(half x, half y) ...@@ -3122,13 +3123,14 @@ inline half operator+(half x, half y)
return half(detail::binary, return half(detail::binary,
(absx > 0x7C00 || absy > 0x7C00) (absx > 0x7C00 || absy > 0x7C00)
? detail::signal(x.data_, y.data_) ? detail::signal(x.data_, y.data_)
: (absy != 0x7C00) ? x.data_ : (sub && absx == 0x7C00) ? detail::invalid() : (absy != 0x7C00) ? x.data_
: y.data_); : (sub && absx == 0x7C00) ? detail::invalid() : y.data_);
if(!absx) if(!absx)
return absy ? y : half(detail::binary, return absy ? y
(half::round_style == std::round_toward_neg_infinity) : half(detail::binary,
? (x.data_ | y.data_) (half::round_style == std::round_toward_neg_infinity)
: (x.data_ & y.data_)); ? (x.data_ | y.data_)
: (x.data_ & y.data_));
if(!absy) if(!absy)
return x; return x;
unsigned int sign = ((sub && absy > absx) ? y.data_ : x.data_) & 0x8000; unsigned int sign = ((sub && absy > absx) ? y.data_ : x.data_) & 0x8000;
...@@ -3449,10 +3451,11 @@ inline half fma(half x, half y, half z) ...@@ -3449,10 +3451,11 @@ inline half fma(half x, half y, half z)
: (sign | 0x7C00)) : (sign | 0x7C00))
: z; : z;
if(!absx || !absy) if(!absx || !absy)
return absz ? z : half(detail::binary, return absz
(half::round_style == std::round_toward_neg_infinity) ? z
? (z.data_ | sign) : half(detail::binary,
: (z.data_ & sign)); (half::round_style == std::round_toward_neg_infinity) ? (z.data_ | sign)
: (z.data_ & sign));
for(; absx < 0x400; absx <<= 1, --exp) for(; absx < 0x400; absx <<= 1, --exp)
; ;
for(; absy < 0x400; absy <<= 1, --exp) for(; absy < 0x400; absy <<= 1, --exp)
...@@ -3516,9 +3519,8 @@ inline half fma(half x, half y, half z) ...@@ -3516,9 +3519,8 @@ inline half fma(half x, half y, half z)
inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) inline HALF_CONSTEXPR_NOERR half fmax(half x, half y)
{ {
return half(detail::binary, return half(detail::binary,
(!isnan(y) && (isnan(x) || (!isnan(y) && (isnan(x) || (x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) <
(x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) < (y.data_ ^ (0x8000 | (0x8000 - (y.data_ >> 15))))))
(y.data_ ^ (0x8000 | (0x8000 - (y.data_ >> 15))))))
? detail::select(y.data_, x.data_) ? detail::select(y.data_, x.data_)
: detail::select(x.data_, y.data_)); : detail::select(x.data_, y.data_));
} }
...@@ -3533,9 +3535,8 @@ inline HALF_CONSTEXPR_NOERR half fmax(half x, half y) ...@@ -3533,9 +3535,8 @@ inline HALF_CONSTEXPR_NOERR half fmax(half x, half y)
inline HALF_CONSTEXPR_NOERR half fmin(half x, half y) inline HALF_CONSTEXPR_NOERR half fmin(half x, half y)
{ {
return half(detail::binary, return half(detail::binary,
(!isnan(y) && (isnan(x) || (!isnan(y) && (isnan(x) || (x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) >
(x.data_ ^ (0x8000 | (0x8000 - (x.data_ >> 15)))) > (y.data_ ^ (0x8000 | (0x8000 - (y.data_ >> 15))))))
(y.data_ ^ (0x8000 | (0x8000 - (y.data_ >> 15))))))
? detail::select(y.data_, x.data_) ? detail::select(y.data_, x.data_)
: detail::select(x.data_, y.data_)); : detail::select(x.data_, y.data_));
} }
...@@ -3886,9 +3887,9 @@ inline half log1p(half arg) ...@@ -3886,9 +3887,9 @@ inline half log1p(half arg)
#else #else
if(arg.data_ >= 0xBC00) if(arg.data_ >= 0xBC00)
return half(detail::binary, return half(detail::binary,
(arg.data_ == 0xBC00) ? detail::pole(0x8000) : (arg.data_ <= 0xFC00) (arg.data_ == 0xBC00)
? detail::invalid() ? detail::pole(0x8000)
: detail::signal(arg.data_)); : (arg.data_ <= 0xFC00) ? detail::invalid() : detail::signal(arg.data_));
int abs = arg.data_ & 0x7FFF, exp = -15; int abs = arg.data_ & 0x7FFF, exp = -15;
if(!abs || abs >= 0x7C00) if(!abs || abs >= 0x7C00)
return (abs > 0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg; return (abs > 0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
...@@ -4395,7 +4396,7 @@ inline half cos(half arg) ...@@ -4395,7 +4396,7 @@ inline half cos(half arg)
if(half::round_style != std::round_to_nearest && abs == 0x598C) if(half::round_style != std::round_to_nearest && abs == 0x598C)
return half(detail::binary, detail::rounded<half::round_style, true>(0x80FC, 1, 1)); return half(detail::binary, detail::rounded<half::round_style, true>(0x80FC, 1, 1));
std::pair<detail::uint32, detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28); std::pair<detail::uint32, detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
detail::uint32 sign = -static_cast<detail::uint32>(((k >> 1) ^ k) & 1); detail::uint32 sign = -static_cast<detail::uint32>(((k >> 1) ^ k) & 1);
return half(detail::binary, return half(detail::binary,
detail::fixed2half<half::round_style, 30, true, true, true>( detail::fixed2half<half::round_style, 30, true, true, true>(
(((k & 1) ? sc.first : sc.second) ^ sign) - sign)); (((k & 1) ? sc.first : sc.second) ^ sign) - sign));
...@@ -4439,7 +4440,7 @@ inline half tan(half arg) ...@@ -4439,7 +4440,7 @@ inline half tan(half arg)
} }
std::pair<detail::uint32, detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 30); std::pair<detail::uint32, detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 30);
if(k & 1) if(k & 1)
sc = std::make_pair(-sc.second, sc.first); sc = std::make_pair(-sc.second, sc.first);
detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second); detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second);
detail::uint32 my = (sc.first ^ signy) - signy, mx = (sc.second ^ signx) - signx; detail::uint32 my = (sc.first ^ signy) - signy, mx = (sc.second ^ signx) - signx;
for(; my < 0x80000000; my <<= 1, --exp) for(; my < 0x80000000; my <<= 1, --exp)
...@@ -4517,7 +4518,7 @@ inline half acos(half arg) ...@@ -4517,7 +4518,7 @@ inline half acos(half arg)
? detail::invalid() ? detail::invalid()
: sign ? detail::rounded<half::round_style, true>(0x4248, 0, 1) : 0); : sign ? detail::rounded<half::round_style, true>(0x4248, 0, 1) : 0);
std::pair<detail::uint32, detail::uint32> cs = detail::atan2_args(abs); std::pair<detail::uint32, detail::uint32> cs = detail::atan2_args(abs);
detail::uint32 m = detail::atan2(cs.second, cs.first, 28); detail::uint32 m = detail::atan2(cs.second, cs.first, 28);
return half(detail::binary, return half(detail::binary,
detail::fixed2half<half::round_style, 31, false, true, true>( detail::fixed2half<half::round_style, 31, false, true, true>(
sign ? (0xC90FDAA2 - m) : m, 15, 0, sign)); sign ? (0xC90FDAA2 - m) : m, 15, 0, sign));
...@@ -5354,13 +5355,13 @@ inline HALF_CONSTEXPR half copysign(half x, half y) ...@@ -5354,13 +5355,13 @@ inline HALF_CONSTEXPR half copysign(half x, half y)
/// \retval FP_NORMAL for all other (normal) values /// \retval FP_NORMAL for all other (normal) values
inline HALF_CONSTEXPR int fpclassify(half arg) inline HALF_CONSTEXPR int fpclassify(half arg)
{ {
return !(arg.data_ & 0x7FFF) ? FP_ZERO : ((arg.data_ & 0x7FFF) < 0x400) return !(arg.data_ & 0x7FFF)
? FP_SUBNORMAL ? FP_ZERO
: ((arg.data_ & 0x7FFF) < 0x7C00) : ((arg.data_ & 0x7FFF) < 0x400)
? FP_NORMAL ? FP_SUBNORMAL
: ((arg.data_ & 0x7FFF) == 0x7C00) : ((arg.data_ & 0x7FFF) < 0x7C00)
? FP_INFINITE ? FP_NORMAL
: FP_NAN; : ((arg.data_ & 0x7FFF) == 0x7C00) ? FP_INFINITE : FP_NAN;
} }
/// Check if finite number. /// Check if finite number.
...@@ -5652,7 +5653,7 @@ inline void fethrowexcept(int excepts, const char* msg = "") ...@@ -5652,7 +5653,7 @@ inline void fethrowexcept(int excepts, const char* msg = "")
throw std::range_error(msg); throw std::range_error(msg);
} }
/// \} /// \}
} } // namespace half_float
#undef HALF_UNUSED_NOERR #undef HALF_UNUSED_NOERR
#undef HALF_CONSTEXPR #undef HALF_CONSTEXPR
......
...@@ -3,19 +3,23 @@ rm -f CMakeCache.txt ...@@ -3,19 +3,23 @@ rm -f CMakeCache.txt
rm -f *.cmake rm -f *.cmake
rm -rf CMakeFiles rm -rf CMakeFiles
MY_PROJECT_SOURCE=../../../ MY_PROJECT_SOURCE=../
MY_PROJECT_INSTALL=../install.dir MY_PROJECT_INSTALL=../install.dir
cmake \ cmake \
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \ -D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Release \
-D DEVICE_BACKEND="AMD" \ -D DEVICE_BACKEND="AMD" \
-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \ -D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx1030 -gline-tables-only -save-temps=$CWD -ftemplate-backtrace-limit=0" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \ -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH="/opt/rocm" \ -D CMAKE_PREFIX_PATH="/opt/rocm" \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
${MY_PROJECT_SOURCE} ${MY_PROJECT_SOURCE}
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -save-temps=$CWD" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -gline-tables-only -save-temps=$CWD" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -save-temps=$CWD" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \ #-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -save-temps" \ #-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -save-temps=$CWD" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -v -gline-tables-only -save-temps" \ #-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -v -gline-tables-only -save-temps=$CWD" \
#!/bin/bash
FILE=$1
echo v0 $( grep -w v0 $FILE | wc -l )
echo v1 $( grep -w v1 $FILE | wc -l )
echo v2 $( grep -w v2 $FILE | wc -l )
echo v3 $( grep -w v3 $FILE | wc -l )
echo v4 $( grep -w v4 $FILE | wc -l )
echo v5 $( grep -w v5 $FILE | wc -l )
echo v6 $( grep -w v6 $FILE | wc -l )
echo v7 $( grep -w v7 $FILE | wc -l )
echo v8 $( grep -w v8 $FILE | wc -l )
echo v9 $( grep -w v9 $FILE | wc -l )
echo v10 $( grep -w v10 $FILE | wc -l )
echo v11 $( grep -w v11 $FILE | wc -l )
echo v12 $( grep -w v12 $FILE | wc -l )
echo v13 $( grep -w v13 $FILE | wc -l )
echo v14 $( grep -w v14 $FILE | wc -l )
echo v15 $( grep -w v15 $FILE | wc -l )
echo v16 $( grep -w v16 $FILE | wc -l )
echo v17 $( grep -w v17 $FILE | wc -l )
echo v18 $( grep -w v18 $FILE | wc -l )
echo v19 $( grep -w v19 $FILE | wc -l )
echo v20 $( grep -w v20 $FILE | wc -l )
echo v21 $( grep -w v21 $FILE | wc -l )
echo v22 $( grep -w v22 $FILE | wc -l )
echo v23 $( grep -w v23 $FILE | wc -l )
echo v24 $( grep -w v24 $FILE | wc -l )
echo v25 $( grep -w v25 $FILE | wc -l )
echo v26 $( grep -w v26 $FILE | wc -l )
echo v27 $( grep -w v27 $FILE | wc -l )
echo v28 $( grep -w v28 $FILE | wc -l )
echo v29 $( grep -w v29 $FILE | wc -l )
echo v30 $( grep -w v30 $FILE | wc -l )
echo v31 $( grep -w v31 $FILE | wc -l )
echo v32 $( grep -w v32 $FILE | wc -l )
echo v33 $( grep -w v33 $FILE | wc -l )
echo v34 $( grep -w v34 $FILE | wc -l )
echo v35 $( grep -w v35 $FILE | wc -l )
echo v36 $( grep -w v36 $FILE | wc -l )
echo v37 $( grep -w v37 $FILE | wc -l )
echo v38 $( grep -w v38 $FILE | wc -l )
echo v39 $( grep -w v39 $FILE | wc -l )
echo v40 $( grep -w v40 $FILE | wc -l )
echo v41 $( grep -w v41 $FILE | wc -l )
echo v42 $( grep -w v42 $FILE | wc -l )
echo v43 $( grep -w v43 $FILE | wc -l )
echo v44 $( grep -w v44 $FILE | wc -l )
echo v45 $( grep -w v45 $FILE | wc -l )
echo v46 $( grep -w v46 $FILE | wc -l )
echo v47 $( grep -w v47 $FILE | wc -l )
echo v48 $( grep -w v48 $FILE | wc -l )
echo v49 $( grep -w v49 $FILE | wc -l )
echo v50 $( grep -w v50 $FILE | wc -l )
echo v51 $( grep -w v51 $FILE | wc -l )
echo v52 $( grep -w v52 $FILE | wc -l )
echo v53 $( grep -w v53 $FILE | wc -l )
echo v54 $( grep -w v54 $FILE | wc -l )
echo v55 $( grep -w v55 $FILE | wc -l )
echo v56 $( grep -w v56 $FILE | wc -l )
echo v57 $( grep -w v57 $FILE | wc -l )
echo v58 $( grep -w v58 $FILE | wc -l )
echo v59 $( grep -w v59 $FILE | wc -l )
echo v60 $( grep -w v60 $FILE | wc -l )
echo v61 $( grep -w v61 $FILE | wc -l )
echo v62 $( grep -w v62 $FILE | wc -l )
echo v63 $( grep -w v63 $FILE | wc -l )
echo v64 $( grep -w v64 $FILE | wc -l )
echo v65 $( grep -w v65 $FILE | wc -l )
echo v66 $( grep -w v66 $FILE | wc -l )
echo v67 $( grep -w v67 $FILE | wc -l )
echo v68 $( grep -w v68 $FILE | wc -l )
echo v69 $( grep -w v69 $FILE | wc -l )
echo v70 $( grep -w v70 $FILE | wc -l )
echo v71 $( grep -w v71 $FILE | wc -l )
echo v72 $( grep -w v72 $FILE | wc -l )
echo v73 $( grep -w v73 $FILE | wc -l )
echo v74 $( grep -w v74 $FILE | wc -l )
echo v75 $( grep -w v75 $FILE | wc -l )
echo v76 $( grep -w v76 $FILE | wc -l )
echo v77 $( grep -w v77 $FILE | wc -l )
echo v78 $( grep -w v78 $FILE | wc -l )
echo v79 $( grep -w v79 $FILE | wc -l )
echo v80 $( grep -w v80 $FILE | wc -l )
echo v81 $( grep -w v81 $FILE | wc -l )
echo v82 $( grep -w v82 $FILE | wc -l )
echo v83 $( grep -w v83 $FILE | wc -l )
echo v84 $( grep -w v84 $FILE | wc -l )
echo v85 $( grep -w v85 $FILE | wc -l )
echo v86 $( grep -w v86 $FILE | wc -l )
echo v87 $( grep -w v87 $FILE | wc -l )
echo v88 $( grep -w v88 $FILE | wc -l )
echo v89 $( grep -w v89 $FILE | wc -l )
echo v90 $( grep -w v90 $FILE | wc -l )
echo v91 $( grep -w v91 $FILE | wc -l )
echo v92 $( grep -w v92 $FILE | wc -l )
echo v93 $( grep -w v93 $FILE | wc -l )
echo v94 $( grep -w v94 $FILE | wc -l )
echo v95 $( grep -w v95 $FILE | wc -l )
echo v96 $( grep -w v96 $FILE | wc -l )
echo v97 $( grep -w v97 $FILE | wc -l )
echo v98 $( grep -w v98 $FILE | wc -l )
echo v99 $( grep -w v99 $FILE | wc -l )
echo v100 $( grep -w v100 $FILE | wc -l )
echo v101 $( grep -w v101 $FILE | wc -l )
echo v102 $( grep -w v102 $FILE | wc -l )
echo v103 $( grep -w v103 $FILE | wc -l )
echo v104 $( grep -w v104 $FILE | wc -l )
echo v105 $( grep -w v105 $FILE | wc -l )
echo v106 $( grep -w v106 $FILE | wc -l )
echo v107 $( grep -w v107 $FILE | wc -l )
echo v108 $( grep -w v108 $FILE | wc -l )
echo v109 $( grep -w v109 $FILE | wc -l )
echo v110 $( grep -w v110 $FILE | wc -l )
echo v111 $( grep -w v111 $FILE | wc -l )
echo v112 $( grep -w v112 $FILE | wc -l )
echo v113 $( grep -w v113 $FILE | wc -l )
echo v114 $( grep -w v114 $FILE | wc -l )
echo v115 $( grep -w v115 $FILE | wc -l )
echo v116 $( grep -w v116 $FILE | wc -l )
echo v117 $( grep -w v117 $FILE | wc -l )
echo v118 $( grep -w v118 $FILE | wc -l )
echo v119 $( grep -w v119 $FILE | wc -l )
echo v120 $( grep -w v120 $FILE | wc -l )
echo v121 $( grep -w v121 $FILE | wc -l )
echo v122 $( grep -w v122 $FILE | wc -l )
echo v123 $( grep -w v123 $FILE | wc -l )
echo v124 $( grep -w v124 $FILE | wc -l )
echo v125 $( grep -w v125 $FILE | wc -l )
echo v126 $( grep -w v126 $FILE | wc -l )
echo v127 $( grep -w v127 $FILE | wc -l )
echo v128 $( grep -w v128 $FILE | wc -l )
echo v129 $( grep -w v129 $FILE | wc -l )
echo v130 $( grep -w v130 $FILE | wc -l )
echo v131 $( grep -w v131 $FILE | wc -l )
echo v132 $( grep -w v132 $FILE | wc -l )
echo v133 $( grep -w v133 $FILE | wc -l )
echo v134 $( grep -w v134 $FILE | wc -l )
echo v135 $( grep -w v135 $FILE | wc -l )
echo v136 $( grep -w v136 $FILE | wc -l )
echo v137 $( grep -w v137 $FILE | wc -l )
echo v138 $( grep -w v138 $FILE | wc -l )
echo v139 $( grep -w v139 $FILE | wc -l )
echo v140 $( grep -w v140 $FILE | wc -l )
echo v141 $( grep -w v141 $FILE | wc -l )
echo v142 $( grep -w v142 $FILE | wc -l )
echo v143 $( grep -w v143 $FILE | wc -l )
echo v144 $( grep -w v144 $FILE | wc -l )
echo v145 $( grep -w v145 $FILE | wc -l )
echo v146 $( grep -w v146 $FILE | wc -l )
echo v147 $( grep -w v147 $FILE | wc -l )
echo v148 $( grep -w v148 $FILE | wc -l )
echo v149 $( grep -w v149 $FILE | wc -l )
echo v150 $( grep -w v150 $FILE | wc -l )
echo v151 $( grep -w v151 $FILE | wc -l )
echo v152 $( grep -w v152 $FILE | wc -l )
echo v153 $( grep -w v153 $FILE | wc -l )
echo v154 $( grep -w v154 $FILE | wc -l )
echo v155 $( grep -w v155 $FILE | wc -l )
echo v156 $( grep -w v156 $FILE | wc -l )
echo v157 $( grep -w v157 $FILE | wc -l )
echo v158 $( grep -w v158 $FILE | wc -l )
echo v159 $( grep -w v159 $FILE | wc -l )
echo v160 $( grep -w v160 $FILE | wc -l )
echo v161 $( grep -w v161 $FILE | wc -l )
echo v162 $( grep -w v162 $FILE | wc -l )
echo v163 $( grep -w v163 $FILE | wc -l )
echo v164 $( grep -w v164 $FILE | wc -l )
echo v165 $( grep -w v165 $FILE | wc -l )
echo v166 $( grep -w v166 $FILE | wc -l )
echo v167 $( grep -w v167 $FILE | wc -l )
echo v168 $( grep -w v168 $FILE | wc -l )
echo v169 $( grep -w v169 $FILE | wc -l )
echo v170 $( grep -w v170 $FILE | wc -l )
echo v171 $( grep -w v171 $FILE | wc -l )
echo v172 $( grep -w v172 $FILE | wc -l )
echo v173 $( grep -w v173 $FILE | wc -l )
echo v174 $( grep -w v174 $FILE | wc -l )
echo v175 $( grep -w v175 $FILE | wc -l )
echo v176 $( grep -w v176 $FILE | wc -l )
echo v177 $( grep -w v177 $FILE | wc -l )
echo v178 $( grep -w v178 $FILE | wc -l )
echo v179 $( grep -w v179 $FILE | wc -l )
echo v180 $( grep -w v180 $FILE | wc -l )
echo v181 $( grep -w v181 $FILE | wc -l )
echo v182 $( grep -w v182 $FILE | wc -l )
echo v183 $( grep -w v183 $FILE | wc -l )
echo v184 $( grep -w v184 $FILE | wc -l )
echo v185 $( grep -w v185 $FILE | wc -l )
echo v186 $( grep -w v186 $FILE | wc -l )
echo v187 $( grep -w v187 $FILE | wc -l )
echo v188 $( grep -w v188 $FILE | wc -l )
echo v189 $( grep -w v189 $FILE | wc -l )
echo v190 $( grep -w v190 $FILE | wc -l )
echo v191 $( grep -w v191 $FILE | wc -l )
echo v192 $( grep -w v192 $FILE | wc -l )
echo v193 $( grep -w v193 $FILE | wc -l )
echo v194 $( grep -w v194 $FILE | wc -l )
echo v195 $( grep -w v195 $FILE | wc -l )
echo v196 $( grep -w v196 $FILE | wc -l )
echo v197 $( grep -w v197 $FILE | wc -l )
echo v198 $( grep -w v198 $FILE | wc -l )
echo v199 $( grep -w v199 $FILE | wc -l )
echo v200 $( grep -w v200 $FILE | wc -l )
echo v201 $( grep -w v201 $FILE | wc -l )
echo v202 $( grep -w v202 $FILE | wc -l )
echo v203 $( grep -w v203 $FILE | wc -l )
echo v204 $( grep -w v204 $FILE | wc -l )
echo v205 $( grep -w v205 $FILE | wc -l )
echo v206 $( grep -w v206 $FILE | wc -l )
echo v207 $( grep -w v207 $FILE | wc -l )
echo v208 $( grep -w v208 $FILE | wc -l )
echo v209 $( grep -w v209 $FILE | wc -l )
echo v210 $( grep -w v210 $FILE | wc -l )
echo v211 $( grep -w v211 $FILE | wc -l )
echo v212 $( grep -w v212 $FILE | wc -l )
echo v213 $( grep -w v213 $FILE | wc -l )
echo v214 $( grep -w v214 $FILE | wc -l )
echo v215 $( grep -w v215 $FILE | wc -l )
echo v216 $( grep -w v216 $FILE | wc -l )
echo v217 $( grep -w v217 $FILE | wc -l )
echo v218 $( grep -w v218 $FILE | wc -l )
echo v219 $( grep -w v219 $FILE | wc -l )
echo v220 $( grep -w v220 $FILE | wc -l )
echo v221 $( grep -w v221 $FILE | wc -l )
echo v222 $( grep -w v222 $FILE | wc -l )
echo v223 $( grep -w v223 $FILE | wc -l )
echo v224 $( grep -w v224 $FILE | wc -l )
echo v225 $( grep -w v225 $FILE | wc -l )
echo v226 $( grep -w v226 $FILE | wc -l )
echo v227 $( grep -w v227 $FILE | wc -l )
echo v228 $( grep -w v228 $FILE | wc -l )
echo v229 $( grep -w v229 $FILE | wc -l )
echo v230 $( grep -w v230 $FILE | wc -l )
echo v231 $( grep -w v231 $FILE | wc -l )
echo v232 $( grep -w v232 $FILE | wc -l )
echo v233 $( grep -w v233 $FILE | wc -l )
echo v234 $( grep -w v234 $FILE | wc -l )
echo v235 $( grep -w v235 $FILE | wc -l )
echo v236 $( grep -w v236 $FILE | wc -l )
echo v237 $( grep -w v237 $FILE | wc -l )
echo v238 $( grep -w v238 $FILE | wc -l )
echo v239 $( grep -w v239 $FILE | wc -l )
echo v240 $( grep -w v240 $FILE | wc -l )
echo v241 $( grep -w v241 $FILE | wc -l )
echo v242 $( grep -w v242 $FILE | wc -l )
echo v243 $( grep -w v243 $FILE | wc -l )
echo v244 $( grep -w v244 $FILE | wc -l )
echo v245 $( grep -w v245 $FILE | wc -l )
echo v246 $( grep -w v246 $FILE | wc -l )
echo v247 $( grep -w v247 $FILE | wc -l )
echo v248 $( grep -w v248 $FILE | wc -l )
echo v249 $( grep -w v249 $FILE | wc -l )
echo v250 $( grep -w v250 $FILE | wc -l )
echo v251 $( grep -w v251 $FILE | wc -l )
echo v252 $( grep -w v252 $FILE | wc -l )
echo v253 $( grep -w v253 $FILE | wc -l )
echo v254 $( grep -w v254 $FILE | wc -l )
echo v255 $( grep -w v255 $FILE | wc -l )
rm *.ll *.s
BC_FILE=$1
/opt/rocm/llvm/bin/llvm-dis $BC_FILE -o original.ll
/opt/rocm/llvm/bin/opt -S -inline -inline-threshold=104857 original.ll > inline.ll
/opt/rocm/llvm/bin/opt -S -sroa inline.ll > sroa.ll
/opt/rocm/llvm/bin/opt -S -O3 sroa.ll > o3.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 original.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 inline.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 sroa.ll
/opt/rocm/llvm/bin/llc -mcpu=gfx906 o3.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa inline.ll > o3.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa o3.ll > o3_2.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa o3_2.ll > o3_3.ll
#/opt/rocm/llvm/bin/opt -S -O3 -sroa o3_3.ll > o3_4.ll
#/opt/rocm/llvm/bin/llc -mcpu=gfx908 opt.ll
#/opt/rocm/llvm/bin/llc -mcpu=gfx908 inline.ll
#/opt/rocm/llvm/bin/llc -mcpu=gfx908 o3.ll
#/opt/rocm/llvm/bin/llc -mcpu=gfx908 o3_2.ll
#/opt/rocm/llvm/bin/llc -mcpu=gfx908 o3_3.ll
#/opt/rocm/llvm/bin/llc -mcpu=gfx908 o3_4.ll
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment