fix

e6262690 · xuxzh1 · d5e51aa9 · e6262690
Commit e6262690 authored Dec 05, 2024 by xuxzh1 🎱
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

ggml/src/ggml-cuda/softmax.cu ggml/src/ggml-cuda/softmax.cu +1 -1

No files found.
--- a/ggml/src/ggml-cuda/softmax.cu
+++ b/ggml/src/ggml-cuda/softmax.cu
@@ -12,7 +12,7 @@ __device__ float __forceinline__ t2f32<half>(half val) {
 }

 template <bool vals_smem, int ncols_template, int block_size_template, typename T>
-static __global__ void soft_max_f32(const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y, const float scale, const float max_bias, const float m0, const float m1, uint32_t n_head_log2) {
+static __global__ void __launch_bounds__(1024) soft_max_f32(const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y, const float scale, const float max_bias, const float m0, const float m1, uint32_t n_head_log2) {
    const int ncols = ncols_template == 0 ? ncols_par : ncols_template;

    const int tid  = threadIdx.x;