Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0dc4c8e9
Commit
0dc4c8e9
authored
Mar 19, 2025
by
zhuwenwen
Browse files
skip rms_norm_dynamic_per_token_quant
parent
d9ef7ce7
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
13 deletions
+13
-13
csrc/ops.h
csrc/ops.h
+7
-7
csrc/torch_bindings.cpp
csrc/torch_bindings.cpp
+6
-6
No files found.
csrc/ops.h
View file @
0dc4c8e9
...
@@ -68,13 +68,13 @@ void fused_add_rms_norm(torch::Tensor& input, torch::Tensor& residual,
...
@@ -68,13 +68,13 @@ void fused_add_rms_norm(torch::Tensor& input, torch::Tensor& residual,
// torch::Tensor& weight,
// torch::Tensor& weight,
// torch::Tensor& scale, double epsilon);
// torch::Tensor& scale, double epsilon);
void
rms_norm_dynamic_per_token_quant
(
torch
::
Tensor
&
out
,
//
void rms_norm_dynamic_per_token_quant(torch::Tensor& out,
torch
::
Tensor
const
&
input
,
//
torch::Tensor const& input,
torch
::
Tensor
const
&
weight
,
//
torch::Tensor const& weight,
torch
::
Tensor
&
scales
,
//
torch::Tensor& scales,
double
const
epsilon
,
//
double const epsilon,
std
::
optional
<
torch
::
Tensor
>
scale_ub
,
//
std::optional<torch::Tensor> scale_ub,
std
::
optional
<
torch
::
Tensor
>
residual
);
//
std::optional<torch::Tensor> residual);
void
rotary_embedding
(
torch
::
Tensor
&
positions
,
torch
::
Tensor
&
query
,
void
rotary_embedding
(
torch
::
Tensor
&
positions
,
torch
::
Tensor
&
query
,
torch
::
Tensor
&
key
,
int64_t
head_size
,
torch
::
Tensor
&
key
,
int64_t
head_size
,
...
...
csrc/torch_bindings.cpp
View file @
0dc4c8e9
...
@@ -142,12 +142,12 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
...
@@ -142,12 +142,12 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
// &fused_add_rms_norm_static_fp8_quant);
// &fused_add_rms_norm_static_fp8_quant);
// Fused Layernorm + Quant kernels
// Fused Layernorm + Quant kernels
ops
.
def
(
//
ops.def(
"rms_norm_dynamic_per_token_quant(Tensor! result, Tensor input, "
//
"rms_norm_dynamic_per_token_quant(Tensor! result, Tensor input, "
"Tensor weight, Tensor! scale, float epsilon, "
//
"Tensor weight, Tensor! scale, float epsilon, "
"Tensor? scale_ub, Tensor!? residual) -> ()"
);
//
"Tensor? scale_ub, Tensor!? residual) -> ()");
ops
.
impl
(
"rms_norm_dynamic_per_token_quant"
,
torch
::
kCUDA
,
//
ops.impl("rms_norm_dynamic_per_token_quant", torch::kCUDA,
&
rms_norm_dynamic_per_token_quant
);
//
&rms_norm_dynamic_per_token_quant);
// Rotary embedding
// Rotary embedding
// Apply GPT-NeoX or GPT-J style rotary embedding to query and key.
// Apply GPT-NeoX or GPT-J style rotary embedding to query and key.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment