Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
029da5e8
Commit
029da5e8
authored
Jul 21, 2025
by
zhuwenwen
Browse files
update List
parent
09396f62
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
8 deletions
+8
-8
vllm/model_executor/layers/quantization/utils/fp8_utils.py
vllm/model_executor/layers/quantization/utils/fp8_utils.py
+8
-8
No files found.
vllm/model_executor/layers/quantization/utils/fp8_utils.py
View file @
029da5e8
...
...
@@ -5,7 +5,7 @@
import
functools
import
json
import
os
from
typing
import
Any
,
Callable
,
Optional
,
Union
from
typing
import
Any
,
Callable
,
Optional
,
Union
,
List
import
torch
...
...
@@ -34,7 +34,7 @@ def cutlass_scaled_mm(
B
:
torch
.
Tensor
,
As
:
torch
.
Tensor
,
Bs
:
torch
.
Tensor
,
block_size
:
l
ist
[
int
],
block_size
:
L
ist
[
int
],
output_dtype
:
torch
.
dtype
=
torch
.
float16
,
)
->
torch
.
Tensor
:
return
ops
.
cutlass_scaled_mm
(
A
,
...
...
@@ -49,7 +49,7 @@ def rocm_aiter_gemm_w8a8_blockscale_impl(
B
:
torch
.
Tensor
,
As
:
torch
.
Tensor
,
Bs
:
torch
.
Tensor
,
block_size
:
l
ist
[
int
],
block_size
:
L
ist
[
int
],
output_dtype
:
torch
.
dtype
=
torch
.
float16
,
)
->
torch
.
Tensor
:
import
aiter
as
rocm_aiter
...
...
@@ -62,7 +62,7 @@ def rocm_aiter_gemm_w8a8_blockscale_fake(
B
:
torch
.
Tensor
,
As
:
torch
.
Tensor
,
Bs
:
torch
.
Tensor
,
block_size
:
l
ist
[
int
],
block_size
:
L
ist
[
int
],
output_dtype
:
torch
.
dtype
=
torch
.
float16
,
)
->
torch
.
Tensor
:
...
...
@@ -89,7 +89,7 @@ def dispatch_w8a8_blockscale_func(
torch
.
Tensor
,
torch
.
Tensor
,
torch
.
Tensor
,
l
ist
[
int
],
L
ist
[
int
],
torch
.
dtype
,
],
torch
.
Tensor
]:
if
use_cutlass
:
...
...
@@ -117,7 +117,7 @@ def should_use_deepgemm(output_dtype: torch.dtype, weight: torch.Tensor):
def
apply_w8a8_block_fp8_linear
(
input
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
block_size
:
l
ist
[
int
],
block_size
:
L
ist
[
int
],
weight_scale
:
torch
.
Tensor
,
input_scale
:
Optional
[
torch
.
Tensor
]
=
None
,
bias
:
Optional
[
torch
.
Tensor
]
=
None
,
...
...
@@ -190,7 +190,7 @@ def apply_w8a8_block_fp8_linear(
def
apply_w8a8_block_fp8_linear_fake
(
input
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
,
block_size
:
l
ist
[
int
],
block_size
:
L
ist
[
int
],
weight_scale
:
torch
.
Tensor
,
input_scale
:
Optional
[
torch
.
Tensor
]
=
None
,
bias
:
Optional
[
torch
.
Tensor
]
=
None
,
...
...
@@ -571,7 +571,7 @@ def w8a8_block_fp8_matmul(
B
:
torch
.
Tensor
,
As
:
torch
.
Tensor
,
Bs
:
torch
.
Tensor
,
block_size
:
l
ist
[
int
],
block_size
:
L
ist
[
int
],
output_dtype
:
torch
.
dtype
=
torch
.
float16
,
)
->
torch
.
Tensor
:
"""This function performs matrix multiplication with block-wise
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment