Commit e87de76c authored by zhuwenwen's avatar zhuwenwen
Browse files

fix build error

parent 4c676e3d
......@@ -51,7 +51,7 @@ set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx
# versions are derived from docker/Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.7.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.7.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.4.1")
#
# Try to find python package with an executable that exactly matches
......@@ -263,7 +263,7 @@ set(VLLM_EXT_SRC
# "csrc/quantization/fp8/common.cu"
# "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
"csrc/quantization/gguf/gguf_kernel.cu"
"csrc/quantization/activation_kernels.cu"
# "csrc/quantization/activation_kernels.cu"
"csrc/cuda_utils_kernels.cu"
"csrc/prepare_inputs/advance_step.cu"
"csrc/custom_all_reduce.cu"
......
......@@ -116,43 +116,9 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
return (T)(0.5f * f * (1.0f + ::tanhf(inner)));
}
// template <typename T>
// __device__ __forceinline__ T fatrelu_kernel(const T& x, const float threshold) {
// const float f = (float)x;
// return (T)(f > threshold ? f : 0.0f);
// }
// template <typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&, const float)>
// __global__ void act_and_mul_kernel_with_param(
// scalar_t* __restrict__ out, const scalar_t* __restrict__ input, const int d,
// const float param) {
// const int64_t token_idx = blockIdx.x;
// for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
// const scalar_t x = VLLM_LDG(&input[token_idx * 2 * d + idx]);
// const scalar_t y = VLLM_LDG(&input[token_idx * 2 * d + d + idx]);
// out[token_idx * d + idx] = ACT_FN(x, param) * y;
// }
// }
} // namespace vllm
// #define LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(KERNEL, PARAM) \
// int d = input.size(-1) / 2; \
// int64_t num_tokens = input.numel() / input.size(-1); \
// dim3 grid(num_tokens); \
// dim3 block(std::min(d, 1024)); \
// const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
// const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
// VLLM_DISPATCH_FLOATING_TYPES( \
// input.scalar_type(), "act_and_mul_kernel_with_param", [&] { \
// vllm::act_and_mul_kernel_with_param<scalar_t, KERNEL<scalar_t>> \
// <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(), \
// input.data_ptr<scalar_t>(), d, \
// PARAM); \
// });
// Launch activation and gating kernel.
// Use ACT_FIRST (bool) indicating whether to apply the activation function
// first.
......@@ -163,7 +129,7 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
dim3 block(std::min(d, 1024)); \
if (num_tokens == 0) { \
return; \
}
} \
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
VLLM_DISPATCH_FLOATING_TYPES( \
......@@ -222,9 +188,3 @@ void gelu_tanh_and_mul_opt(torch::Tensor& out, // [..., d]
{
LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_tanh_kernel, true);
}
\ No newline at end of file
// void fatrelu_and_mul_opt(torch::Tensor& out, // [..., d],
// torch::Tensor& input, // [..., 2 * d]
// double threshold) {
// LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(vllm::fatrelu_kernel, threshold);
// }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment