Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3e485650
Commit
3e485650
authored
Jul 04, 2025
by
zhuwenwen
Browse files
增加稠密模型channelwise autotuning 接口
parent
4080ac85
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2 additions
and
1 deletion
+2
-1
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
...ers/quantization/compressed_tensors/compressed_tensors.py
+1
-0
vllm/utils.py
vllm/utils.py
+1
-1
No files found.
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
View file @
3e485650
...
...
@@ -613,6 +613,7 @@ class CompressedTensorsLinearMethod(LinearMethodBase):
_weight
=
weight_data
.
T
.
contiguous
().
reshape
(
n
,
-
1
)
layer
.
weight
.
data
=
_weight
self
.
tritonsingleton
.
gen_model_json
()
layer
.
scheme
.
process_weights_after_loading
(
layer
)
def
create_weights
(
self
,
layer
:
torch
.
nn
.
Module
,
...
...
vllm/utils.py
View file @
3e485650
...
...
@@ -1900,7 +1900,7 @@ class W8a8GetCacheJSON:
self
.
quant_method
=
None
#析构函数,最后会生成model.json的配置文件
def
gen_model_json
(
self
,
E
:
int
,
block_size
:
Optional
[
list
]
=
None
):
def
gen_model_json
(
self
,
E
:
Optional
[
int
]
=
0
,
block_size
:
Optional
[
list
]
=
None
):
json_dir
=
os
.
getenv
(
'LMSLIM_TUNING_JSON'
,
"None"
)
if
json_dir
is
not
"None"
and
os
.
path
.
exists
(
json_dir
):
#生成模型配置文件
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment