Unverified Commit 831021b8 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #384 from InfiniTensor/issue/6/fix

issue/6/fix type convertion
parents 81a5f627 1529608b
...@@ -22,7 +22,7 @@ __device__ void rmsnormBlock( ...@@ -22,7 +22,7 @@ __device__ void rmsnormBlock(
// Thread_0 computes RMS=1/sqrt(ss/dim+epsilon) and stores in shared memory // Thread_0 computes RMS=1/sqrt(ss/dim+epsilon) and stores in shared memory
__shared__ Tcompute rms; __shared__ Tcompute rms;
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
rms = Tdata(rsqrtf(ss / Tcompute(dim) + epsilon)); rms = Tcompute(rsqrtf(ss / Tcompute(dim) + epsilon));
} }
__syncthreads(); __syncthreads();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment