Commit e87de76c authored by zhuwenwen's avatar zhuwenwen
Browse files

fix build error

parent 4c676e3d
...@@ -51,7 +51,7 @@ set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx ...@@ -51,7 +51,7 @@ set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx
# versions are derived from docker/Dockerfile.rocm # versions are derived from docker/Dockerfile.rocm
# #
set(TORCH_SUPPORTED_VERSION_CUDA "2.7.0") set(TORCH_SUPPORTED_VERSION_CUDA "2.7.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.7.0") set(TORCH_SUPPORTED_VERSION_ROCM "2.4.1")
# #
# Try to find python package with an executable that exactly matches # Try to find python package with an executable that exactly matches
...@@ -263,7 +263,7 @@ set(VLLM_EXT_SRC ...@@ -263,7 +263,7 @@ set(VLLM_EXT_SRC
# "csrc/quantization/fp8/common.cu" # "csrc/quantization/fp8/common.cu"
# "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu" # "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
"csrc/quantization/gguf/gguf_kernel.cu" "csrc/quantization/gguf/gguf_kernel.cu"
"csrc/quantization/activation_kernels.cu" # "csrc/quantization/activation_kernels.cu"
"csrc/cuda_utils_kernels.cu" "csrc/cuda_utils_kernels.cu"
"csrc/prepare_inputs/advance_step.cu" "csrc/prepare_inputs/advance_step.cu"
"csrc/custom_all_reduce.cu" "csrc/custom_all_reduce.cu"
......
...@@ -116,43 +116,9 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) { ...@@ -116,43 +116,9 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
return (T)(0.5f * f * (1.0f + ::tanhf(inner))); return (T)(0.5f * f * (1.0f + ::tanhf(inner)));
} }
// template <typename T>
// __device__ __forceinline__ T fatrelu_kernel(const T& x, const float threshold) {
// const float f = (float)x;
// return (T)(f > threshold ? f : 0.0f);
// }
// template <typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&, const float)>
// __global__ void act_and_mul_kernel_with_param(
// scalar_t* __restrict__ out, const scalar_t* __restrict__ input, const int d,
// const float param) {
// const int64_t token_idx = blockIdx.x;
// for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
// const scalar_t x = VLLM_LDG(&input[token_idx * 2 * d + idx]);
// const scalar_t y = VLLM_LDG(&input[token_idx * 2 * d + d + idx]);
// out[token_idx * d + idx] = ACT_FN(x, param) * y;
// }
// }
} // namespace vllm } // namespace vllm
// #define LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(KERNEL, PARAM) \
// int d = input.size(-1) / 2; \
// int64_t num_tokens = input.numel() / input.size(-1); \
// dim3 grid(num_tokens); \
// dim3 block(std::min(d, 1024)); \
// const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
// const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
// VLLM_DISPATCH_FLOATING_TYPES( \
// input.scalar_type(), "act_and_mul_kernel_with_param", [&] { \
// vllm::act_and_mul_kernel_with_param<scalar_t, KERNEL<scalar_t>> \
// <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(), \
// input.data_ptr<scalar_t>(), d, \
// PARAM); \
// });
// Launch activation and gating kernel. // Launch activation and gating kernel.
// Use ACT_FIRST (bool) indicating whether to apply the activation function // Use ACT_FIRST (bool) indicating whether to apply the activation function
// first. // first.
...@@ -163,7 +129,7 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) { ...@@ -163,7 +129,7 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
dim3 block(std::min(d, 1024)); \ dim3 block(std::min(d, 1024)); \
if (num_tokens == 0) { \ if (num_tokens == 0) { \
return; \ return; \
} } \
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \ const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \ const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
VLLM_DISPATCH_FLOATING_TYPES( \ VLLM_DISPATCH_FLOATING_TYPES( \
...@@ -222,9 +188,3 @@ void gelu_tanh_and_mul_opt(torch::Tensor& out, // [..., d] ...@@ -222,9 +188,3 @@ void gelu_tanh_and_mul_opt(torch::Tensor& out, // [..., d]
{ {
LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_tanh_kernel, true); LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_tanh_kernel, true);
} }
\ No newline at end of file
// void fatrelu_and_mul_opt(torch::Tensor& out, // [..., d],
// torch::Tensor& input, // [..., 2 * d]
// double threshold) {
// LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(vllm::fatrelu_kernel, threshold);
// }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment