Commit 8a7e325c authored by Shucai Xiao's avatar Shucai Xiao
Browse files

more refinement to use fma for mul_add

parent fdc0ae82
...@@ -21,7 +21,7 @@ __global__ void mul_add_kernel_dim3(void* a, void* x, void* b, int dim3, void* r ...@@ -21,7 +21,7 @@ __global__ void mul_add_kernel_dim3(void* a, void* x, void* b, int dim3, void* r
if(id < n) if(id < n)
{ {
auto id1 = id % dim3; auto id1 = id % dim3;
hr[id] = __hadd2(__hmul2(ha[id], hx[id1]), hb[id1]); hr[id] = __hfma2(ha[id], hx[id1], hb[id1]);
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment