update norm cu (#3048)

9f8f2c7f · Yineng Zhang · GitHub · 6fc37bd8 · 9f8f2c7f · 6fc37bd8
Unverified Commit 9f8f2c7f authored Jan 22, 2025 by Yineng Zhang Committed by GitHub Jan 22, 2025
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 29 deletions

sgl-kernel/setup.py sgl-kernel/setup.py +1 -1

sgl-kernel/src/sgl-kernel/csrc/norm.cu sgl-kernel/src/sgl-kernel/csrc/norm.cu +0 -28

No files found.
--- a/sgl-kernel/setup.py
+++ b/sgl-kernel/setup.py
@@ -91,7 +91,7 @@ ext_modules = [
            "src/sgl-kernel/csrc/sampling_scaling_penalties.cu",
            "src/sgl-kernel/csrc/sgl_kernel_ops.cu",
            "src/sgl-kernel/csrc/rotary_embedding.cu",
-            "src/sgl-kernel/csrc/norm.cu",
+            "3rdparty/flashinfer/csrc/norm.cu",
        ],
        include_dirs=include_dirs,
        extra_compile_args={

--- a/sgl-kernel/src/sgl-kernel/csrc/norm.cu
+++ b/sgl-kernel/src/sgl-kernel/csrc/norm.cu
-#include <cstdint>
-#include <flashinfer/norm.cuh>
-#include "pytorch_extension_utils.h"
-using namespace flashinfer;
-void rmsnorm(at::Tensor& output, at::Tensor& input, at::Tensor& weight, double eps, int64_t cuda_stream) {
-  CHECK_INPUT(input);
-  CHECK_INPUT(weight);
-  auto device = input.device();
-  CHECK_EQ(weight.device(), device);
-  CHECK_DIM(2, input);   // input: (batch_size, hidden_size)
-  CHECK_DIM(1, weight);  // weight: (hidden_size)
-  CHECK_EQ(input.size(1), weight.size(0));
-  unsigned int batch_size = input.size(0);
-  unsigned int hidden_size = input.size(1);
-  CHECK_EQ(output.size(0), batch_size);
-  CHECK_EQ(output.size(1), hidden_size);
-  cudaStream_t stream = reinterpret_cast<cudaStream_t>(cuda_stream);
-  DISPATCH_PYTORCH_DTYPE_TO_CTYPE_FP16(input.scalar_type(), c_type, [&] {
-    cudaError_t status = norm::RMSNorm(static_cast<c_type*>(input.data_ptr()), static_cast<c_type*>(weight.data_ptr()),
-                                       static_cast<c_type*>(output.data_ptr()), batch_size, hidden_size, eps, stream);
-    TORCH_CHECK(status == cudaSuccess, "RMSNorm failed with error code " + std::string(cudaGetErrorString(status)));
-    return true;
-  });
-}