Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e87de76c
Commit
e87de76c
authored
Jun 21, 2025
by
zhuwenwen
Browse files
fix build error
parent
4c676e3d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
44 deletions
+4
-44
CMakeLists.txt
CMakeLists.txt
+2
-2
csrc/opt/activation_kernels_opt.cu
csrc/opt/activation_kernels_opt.cu
+2
-42
No files found.
CMakeLists.txt
View file @
e87de76c
...
@@ -51,7 +51,7 @@ set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx
...
@@ -51,7 +51,7 @@ set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx
# versions are derived from docker/Dockerfile.rocm
# versions are derived from docker/Dockerfile.rocm
#
#
set
(
TORCH_SUPPORTED_VERSION_CUDA
"2.7.0"
)
set
(
TORCH_SUPPORTED_VERSION_CUDA
"2.7.0"
)
set
(
TORCH_SUPPORTED_VERSION_ROCM
"2.
7.0
"
)
set
(
TORCH_SUPPORTED_VERSION_ROCM
"2.
4.1
"
)
#
#
# Try to find python package with an executable that exactly matches
# Try to find python package with an executable that exactly matches
...
@@ -263,7 +263,7 @@ set(VLLM_EXT_SRC
...
@@ -263,7 +263,7 @@ set(VLLM_EXT_SRC
# "csrc/quantization/fp8/common.cu"
# "csrc/quantization/fp8/common.cu"
# "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
# "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
"csrc/quantization/gguf/gguf_kernel.cu"
"csrc/quantization/gguf/gguf_kernel.cu"
"csrc/quantization/activation_kernels.cu"
#
"csrc/quantization/activation_kernels.cu"
"csrc/cuda_utils_kernels.cu"
"csrc/cuda_utils_kernels.cu"
"csrc/prepare_inputs/advance_step.cu"
"csrc/prepare_inputs/advance_step.cu"
"csrc/custom_all_reduce.cu"
"csrc/custom_all_reduce.cu"
...
...
csrc/opt/activation_kernels_opt.cu
View file @
e87de76c
...
@@ -115,44 +115,10 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
...
@@ -115,44 +115,10 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
float
inner
=
BETA
*
(
f
+
KAPPA
*
x_cube
);
float
inner
=
BETA
*
(
f
+
KAPPA
*
x_cube
);
return
(
T
)(
0.5
f
*
f
*
(
1.0
f
+
::
tanhf
(
inner
)));
return
(
T
)(
0.5
f
*
f
*
(
1.0
f
+
::
tanhf
(
inner
)));
}
}
// template <typename T>
// __device__ __forceinline__ T fatrelu_kernel(const T& x, const float threshold) {
// const float f = (float)x;
// return (T)(f > threshold ? f : 0.0f);
// }
// template <typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&, const float)>
// __global__ void act_and_mul_kernel_with_param(
// scalar_t* __restrict__ out, const scalar_t* __restrict__ input, const int d,
// const float param) {
// const int64_t token_idx = blockIdx.x;
// for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
// const scalar_t x = VLLM_LDG(&input[token_idx * 2 * d + idx]);
// const scalar_t y = VLLM_LDG(&input[token_idx * 2 * d + d + idx]);
// out[token_idx * d + idx] = ACT_FN(x, param) * y;
// }
// }
}
// namespace vllm
}
// namespace vllm
// #define LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(KERNEL, PARAM) \
// int d = input.size(-1) / 2; \
// int64_t num_tokens = input.numel() / input.size(-1); \
// dim3 grid(num_tokens); \
// dim3 block(std::min(d, 1024)); \
// const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
// const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
// VLLM_DISPATCH_FLOATING_TYPES( \
// input.scalar_type(), "act_and_mul_kernel_with_param", [&] { \
// vllm::act_and_mul_kernel_with_param<scalar_t, KERNEL<scalar_t>> \
// <<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(), \
// input.data_ptr<scalar_t>(), d, \
// PARAM); \
// });
// Launch activation and gating kernel.
// Launch activation and gating kernel.
// Use ACT_FIRST (bool) indicating whether to apply the activation function
// Use ACT_FIRST (bool) indicating whether to apply the activation function
// first.
// first.
...
@@ -163,7 +129,7 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
...
@@ -163,7 +129,7 @@ __device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
dim3 block(std::min(d, 1024)); \
dim3 block(std::min(d, 1024)); \
if (num_tokens == 0) { \
if (num_tokens == 0) { \
return; \
return; \
}
}
\
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
VLLM_DISPATCH_FLOATING_TYPES( \
VLLM_DISPATCH_FLOATING_TYPES( \
...
@@ -221,10 +187,4 @@ void gelu_tanh_and_mul_opt(torch::Tensor& out, // [..., d]
...
@@ -221,10 +187,4 @@ void gelu_tanh_and_mul_opt(torch::Tensor& out, // [..., d]
torch
::
Tensor
&
input
)
// [..., 2 * d]
torch
::
Tensor
&
input
)
// [..., 2 * d]
{
{
LAUNCH_ACTIVATION_GATE_KERNEL
(
vllm
::
gelu_tanh_kernel
,
true
);
LAUNCH_ACTIVATION_GATE_KERNEL
(
vllm
::
gelu_tanh_kernel
,
true
);
}
}
\ No newline at end of file
// void fatrelu_and_mul_opt(torch::Tensor& out, // [..., d],
// torch::Tensor& input, // [..., 2 * d]
// double threshold) {
// LAUNCH_ACTIVATION_GATE_KERNEL_WITH_PARAM(vllm::fatrelu_kernel, threshold);
// }
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment