Commit a6477298 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

clang format

parent fe849702
......@@ -215,8 +215,11 @@ __device__ __half2 block_reduce_half2(
// m = x - mean(x)
// m / sqrt(mean(m ^ 2) + 1e-12)
__device__ void layernorm_kernel_half2(__half2* in_data, __half2* in_data_reduce,
__half2* out, index_int batch_item_num, index_int block_size,
__device__ void layernorm_kernel_half2(__half2* in_data,
__half2* in_data_reduce,
__half2* out,
index_int batch_item_num,
index_int block_size,
float rbatch_num)
{
auto rnum = __float2half2_rn(rbatch_num);
......@@ -289,8 +292,12 @@ block_reduce_half(T* buffer, index_int batch_item_num, index_int tid, index_int
// m = x - mean(x)
// m / sqrt(mean(m ^ 2) + 1e-12)
__device__ void layernorm_kernel_half(__half* in_data, __half* in_data_reduce, __half* out,
index_int batch_item_num, index_int block_size, float rnum)
__device__ void layernorm_kernel_half(__half* in_data,
__half* in_data_reduce,
__half* out,
index_int batch_item_num,
index_int block_size,
float rnum)
{
auto m = block_reduce_half(in_data_reduce, batch_item_num, threadIdx.x, block_size);
m *= rnum;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment