Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
550a1e5e
Commit
550a1e5e
authored
Jun 12, 2025
by
zhuwenwen
Browse files
Update int8_utils.py
parent
d8b9028d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
vllm/model_executor/layers/quantization/utils/int8_utils.py
vllm/model_executor/layers/quantization/utils/int8_utils.py
+4
-4
No files found.
vllm/model_executor/layers/quantization/utils/int8_utils.py
View file @
550a1e5e
...
...
@@ -446,10 +446,10 @@ def w8a8_block_int8_matmul(
C_shape
=
A
.
shape
[:
-
1
]
+
(
N
,
)
C
=
A
.
new_empty
(
C_shape
,
dtype
=
output_dtype
)
if
len
(
W8A8_TRITONJSON
.
triton_json_
lis
t
)
==
0
:
if
len
(
W8A8_TRITONJSON
.
triton_json_
dic
t
)
==
0
:
config
=
None
elif
f
"1_
{
N
}
_
{
K
}
_block[
{
block_n
}
,
{
block_k
}
]"
in
W8A8_TRITONJSON
.
triton_json_
list
[
0
]
:
elif
f
"1_
{
N
}
_
{
K
}
_block[
{
block_n
}
,
{
block_k
}
]"
in
W8A8_TRITONJSON
.
triton_json_
dict
:
if
M
<=
16
:
m_
=
M
elif
M
<=
64
:
...
...
@@ -472,7 +472,7 @@ def w8a8_block_int8_matmul(
else
:
m_
=
8192
config
=
W8A8_TRITONJSON
.
triton_json_
list
[
0
]
[
f
"
{
m_
}
_
{
N
}
_
{
K
}
_block[
{
block_n
}
,
{
block_k
}
]"
]
config
=
W8A8_TRITONJSON
.
triton_json_
dict
[
f
"
{
m_
}
_
{
N
}
_
{
K
}
_block[
{
block_n
}
,
{
block_k
}
]"
]
else
:
config
=
None
...
...
@@ -617,4 +617,4 @@ def block_dequant(
i
*
block_k
:
min
((
i
+
1
)
*
block_k
,
k
),
]
*=
x_s
[
j
][
i
]
return
x_dq_block
return
x_dq_block
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment