增加input tensor size>2^31支持

305b5a09 · zhuwenwen · 1a493a24 · 305b5a09
Commit 305b5a09 authored Nov 20, 2024 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

csrc/opt/layernorm_kernels_opt.cu csrc/opt/layernorm_kernels_opt.cu +2 -2

No files found.
--- a/csrc/opt/layernorm_kernels_opt.cu
+++ b/csrc/opt/layernorm_kernels_opt.cu
@@ -342,7 +342,7 @@ __global__ void fused_add_rms_kernel_opt(scalar_t* input,scalar_t* residual,scal
  scalar_t intput_vec[Vec];
  scalar_t residual_vec[Vec];
  T_ACC trstd;
-  int idx = i * tcol + j;
+  int64_t idx = i * tcol + j;
  idx*=Vec;
  if (j < tcol) {
    *(LoadT*)intput_vec = *(LoadT*)(input+idx);
@@ -381,7 +381,7 @@ __global__ void fused_rms_kernel_opt(scalar_t* input,scalar_t* output,scalar_t*
  using LoadT = at::native::memory::aligned_vector<scalar_t, Vec>;
  scalar_t intput_vec[Vec];
  T_ACC trstd;
-  int idx = i * tcol + j;
+  int64_t idx = i * tcol + j;
  idx*=Vec;
  if (j < tcol) {
    *(LoadT*)intput_vec = *(LoadT*)(input+idx);