fix build error

e87de76c · zhuwenwen · 4c676e3d · e87de76c · e87de76c
Commit e87de76c authored Jun 21, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 44 deletions

CMakeLists.txt CMakeLists.txt +2 -2

csrc/opt/activation_kernels_opt.cu csrc/opt/activation_kernels_opt.cu +2 -42

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,7 +51,7 @@ set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx
 # versions are derived from docker/Dockerfile.rocm
 #
 set(TORCH_SUPPORTED_VERSION_CUDA "2.7.0")
-set(TORCH_SUPPORTED_VERSION_ROCM "2.7.0")
+set(TORCH_SUPPORTED_VERSION_ROCM "2.4.1")
 #
 # Try to find python package with an executable that exactly matches
@@ -263,7 +263,7 @@ set(VLLM_EXT_SRC
  # "csrc/quantization/fp8/common.cu"
  # "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
  "csrc/quantization/gguf/gguf_kernel.cu"
-  "csrc/quantization/activation_kernels.cu"
+  # "csrc/quantization/activation_kernels.cu"
  "csrc/cuda_utils_kernels.cu"
  "csrc/prepare_inputs/advance_step.cu"
  "csrc/custom_all_reduce.cu"

--- a/csrc/opt/activation_kernels_opt.cu
+++ b/csrc/opt/activation_kernels_opt.cu
@@ -115,44 +115,10 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
  float inner = BETA * (f + KAPPA * x_cube);
  return (T)(0.5f * f * (1.0f + ::tanhf(inner)));
 }
-// template <typename T>
-// __device__ __forceinline__ T fatrelu_kernel(const T& x, const float threshold) {
-//   const float f = (float)x;
-//   return (T)(f > threshold ? f : 0.0f);
-// }
-// template <typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&, const float)>
-// __global__ void act_and_mul_kernel_with_param(
-//     scalar_t* __restrict__ out, const scalar_t* __restrict__ input, const int d,
-//     const float param) {
-//   const int64_t token_idx = blockIdx.x;
-//   for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
-//     const scalar_t x = VLLM_LDG(&input[token_idx * 2 * d + idx]);
-//     const scalar_t y = VLLM_LDG(&input[token_idx * 2 * d + d + idx]);
-//     out[token_idx * d + idx] = ACT_FN(x, param) * y;
-//   }
-//     }
 }  // namespace vllm
-// #define LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(KERNEL, PARAM)         \
-//   int d = input.size(-1) / 2;                                           \
-//   int64_t num_tokens = input.numel() / input.size(-1);                  \
-//   dim3 grid(num_tokens);                                                \
-//   dim3 block(std::min(d, 1024));                                        \
-//   const at::cuda::OptionalCUDAGuard device_guard(device_of(input));     \
-//   const cudaStream_t stream = at::cuda::getCurrentCUDAStream();         \
-//   VLLM_DISPATCH_FLOATING_TYPES(                                         \
-//       input.scalar_type(), "act_and_mul_kernel_with_param", [&] {       \
-//         vllm::act_and_mul_kernel_with_param<scalar_t, KERNEL<scalar_t>> \
-//             <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(),      \
-//                                          input.data_ptr<scalar_t>(), d, \
-//                                          PARAM);                        \
-//       });
 // Launch activation and gating kernel.
 // Use ACT_FIRST (bool) indicating whether to apply the activation function
 // first.
@@ -163,7 +129,7 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
  dim3 block(std::min(d, 1024));                                               \
  if (num_tokens == 0) {                                                       \
    return;                                                                    \
-  }  
+  }                                                                            \
  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));            \
  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();                \
  VLLM_DISPATCH_FLOATING_TYPES(                                                \
@@ -221,10 +187,4 @@ void gelu_tanh_and_mul_opt(torch::Tensor& out,    // [..., d]
                       torch::Tensor& input)  // [..., 2 * d]
 {
  LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_tanh_kernel, true);
 }
\ No newline at end of file
-// void fatrelu_and_mul_opt(torch::Tensor& out,    // [..., d],
-//                      torch::Tensor& input,  // [..., 2 * d]
-//                      double threshold) {
-//   LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(vllm::fatrelu_kernel, threshold);
-// }
\ No newline at end of file