Commit e6262690 authored by xuxzh1's avatar xuxzh1 🎱
Browse files

fix

parent d5e51aa9
...@@ -12,7 +12,7 @@ __device__ float __forceinline__ t2f32<half>(half val) { ...@@ -12,7 +12,7 @@ __device__ float __forceinline__ t2f32<half>(half val) {
} }
template <bool vals_smem, int ncols_template, int block_size_template, typename T> template <bool vals_smem, int ncols_template, int block_size_template, typename T>
static __global__ void soft_max_f32(const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y, const float scale, const float max_bias, const float m0, const float m1, uint32_t n_head_log2) { static __global__ void __launch_bounds__(1024) soft_max_f32(const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y, const float scale, const float max_bias, const float m0, const float m1, uint32_t n_head_log2) {
const int ncols = ncols_template == 0 ? ncols_par : ncols_template; const int ncols = ncols_template == 0 ? ncols_par : ncols_template;
const int tid = threadIdx.x; const int tid = threadIdx.x;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment