Commit 320bc57b authored by Chao Liu's avatar Chao Liu
Browse files

update

parent 8fecf4c3
...@@ -52,12 +52,12 @@ struct BiasReluAdd ...@@ -52,12 +52,12 @@ struct BiasReluAdd
} }
}; };
struct BiasAdd struct DoSomething
{ {
#if 1 #if 1
// correct result // correct result
// no scratch memory, good VGPR allocation (59) // no scratch memory, good VGPR allocation (59)
// good perf (101Tflops) // good perf (101Tflops @ 1089Mhz)
__host__ __device__ constexpr float operator()(float v0, ck::half_t v1, ck::half_t v2) const __host__ __device__ constexpr float operator()(float v0, ck::half_t v1, ck::half_t v2) const
{ {
constexpr float alpha = 0.1; constexpr float alpha = 0.1;
...@@ -80,21 +80,20 @@ struct BiasAdd ...@@ -80,21 +80,20 @@ struct BiasAdd
// wrong result // wrong result
// lots of scratch memory // lots of scratch memory
// huge perf drop // huge perf drop
template <typename T1, typename T2> __host__ __device__ constexpr float operator()(float v0, ck::half_t v1, ck::half_t v2) const
__host__ __device__ constexpr float operator()(float v0, T1 v1, T2 v2) const
{ {
return alpha * v0 + beta * v1 + gamma * v2; return alpha * v0 + beta * v1 + gamma * v2;
} }
#elif 0 #elif 0
// correct result // correct result
// some scratch memory (68 dword) // some scratch memory (68 dword)
// some perf drop (94Tflops) // some perf drop (94Tflops @ 1089MHz)
// fp64 instructions are used // fp64 instructions are used
__host__ __device__ constexpr auto operator()(float v0, ck::half_t v1, ck::half_t v2) const __host__ __device__ constexpr auto operator()(float v0, ck::half_t v1, ck::half_t v2) const
{ {
return 0.1 * v0 + 0.2 * v1 + 0.3 * v2; return 0.1 * v0 + 0.2 * v1 + 0.3 * v2;
} }
#elif 0 #elif 1
// wrong result // wrong result
// lots of scratch memory // lots of scratch memory
// huge perf drop // huge perf drop
...@@ -128,7 +127,11 @@ using CLayout = ck::tensor_layout::gemm::RowMajor; ...@@ -128,7 +127,11 @@ using CLayout = ck::tensor_layout::gemm::RowMajor;
using AOp = PassThrough; using AOp = PassThrough;
using BOp = PassThrough; using BOp = PassThrough;
using COp = BiasReluAdd; #if 1
using COp = BiasReLuAdd
#else
using COp = DoSomething;
#endif
// Compilation parameters for NT problem // Compilation parameters for NT problem
// clang-format off // clang-format off
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment