Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
37771741
Commit
37771741
authored
Dec 17, 2025
by
chenyue3
Browse files
修复CompressedTensorsLinearMethod中的w4a16的冲突问题
parent
259605da
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
1 deletion
+11
-1
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
...ers/quantization/compressed_tensors/compressed_tensors.py
+10
-0
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
...on/compressed_tensors/schemes/compressed_tensors_wNa16.py
+1
-1
No files found.
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
View file @
37771741
...
@@ -723,6 +723,16 @@ class CompressedTensorsLinearMethod(LinearMethodBase):
...
@@ -723,6 +723,16 @@ class CompressedTensorsLinearMethod(LinearMethodBase):
self
.
w8a8_strategy
=
int
(
os
.
getenv
(
'W8A8_SUPPORT_METHODS'
,
'1'
))
self
.
w8a8_strategy
=
int
(
os
.
getenv
(
'W8A8_SUPPORT_METHODS'
,
'1'
))
def
process_weights_after_loading
(
self
,
layer
:
torch
.
nn
.
Module
)
->
None
:
def
process_weights_after_loading
(
self
,
layer
:
torch
.
nn
.
Module
)
->
None
:
weights_scheme
=
(
self
.
quantization_config
.
target_scheme_map
.
get
(
'Linear'
,
{})
.
get
(
'weights'
)
)
if
weights_scheme
is
not
None
:
num_bits
=
weights_scheme
.
num_bits
if
num_bits
==
4
:
return
layer
.
scheme
.
process_weights_after_loading
(
layer
)
n
=
layer
.
weight
.
shape
[
0
]
n
=
layer
.
weight
.
shape
[
0
]
k
=
layer
.
weight
.
shape
[
1
]
k
=
layer
.
weight
.
shape
[
1
]
...
...
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
View file @
37771741
...
@@ -197,5 +197,5 @@ class CompressedTensorsWNA16(CompressedTensorsScheme):
...
@@ -197,5 +197,5 @@ class CompressedTensorsWNA16(CompressedTensorsScheme):
self
.
kernel
.
process_weights_after_loading
(
layer
)
self
.
kernel
.
process_weights_after_loading
(
layer
)
def
apply_weights
(
self
,
layer
:
torch
.
nn
.
Module
,
x
:
torch
.
Tensor
,
def
apply_weights
(
self
,
layer
:
torch
.
nn
.
Module
,
x
:
torch
.
Tensor
,
bias
:
Optional
[
torch
.
Tensor
])
->
torch
.
Tensor
:
bias
:
Optional
[
torch
.
Tensor
]
,
**
kw
)
->
torch
.
Tensor
:
return
self
.
kernel
.
apply_weights
(
layer
,
x
,
bias
)
return
self
.
kernel
.
apply_weights
(
layer
,
x
,
bias
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment