Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
54944679
Commit
54944679
authored
Feb 08, 2025
by
zhuwenwen
Browse files
update layer.py
parent
66b809cc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
7 deletions
+7
-7
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+7
-7
No files found.
vllm/model_executor/layers/fused_moe/layer.py
View file @
54944679
...
@@ -316,6 +316,12 @@ class FusedMoE(torch.nn.Module):
...
@@ -316,6 +316,12 @@ class FusedMoE(torch.nn.Module):
self
.
quant_method
=
quant_config
.
get_quant_method
(
self
,
prefix
)
self
.
quant_method
=
quant_config
.
get_quant_method
(
self
,
prefix
)
assert
self
.
quant_method
is
not
None
assert
self
.
quant_method
is
not
None
if
quant_config
is
None
:
# Not considering quant for now, temporarily
self
.
use_nn_moe
=
int
(
os
.
environ
.
get
(
'MOE_NN'
,
1
))
==
1
else
:
self
.
use_nn_moe
=
False
moe_quant_params
=
{
moe_quant_params
=
{
"num_experts"
:
num_experts
,
"num_experts"
:
num_experts
,
"hidden_size"
:
hidden_size
,
"hidden_size"
:
hidden_size
,
...
@@ -323,19 +329,13 @@ class FusedMoE(torch.nn.Module):
...
@@ -323,19 +329,13 @@ class FusedMoE(torch.nn.Module):
self
.
intermediate_size_per_partition
,
self
.
intermediate_size_per_partition
,
"params_dtype"
:
params_dtype
,
"params_dtype"
:
params_dtype
,
"weight_loader"
:
self
.
weight_loader
,
"weight_loader"
:
self
.
weight_loader
,
"use_nn_moe"
:
self
.
use_nn_moe
,
"use_nn_moe"
:
self
.
use_nn_moe
,
}
}
# need full intermediate size pre-sharding for WNA16 act order
# need full intermediate size pre-sharding for WNA16 act order
if
(
self
.
quant_method
.
__class__
.
__name__
==
if
(
self
.
quant_method
.
__class__
.
__name__
==
"CompressedTensorsWNA16MoEMethod"
):
"CompressedTensorsWNA16MoEMethod"
):
moe_quant_params
[
"intermediate_size_full"
]
=
intermediate_size
moe_quant_params
[
"intermediate_size_full"
]
=
intermediate_size
if
quant_config
is
None
:
# Not considering quant for now, temporarily
self
.
use_nn_moe
=
int
(
os
.
environ
.
get
(
'MOE_NN'
,
1
))
==
1
else
:
self
.
use_nn_moe
=
False
self
.
quant_method
.
create_weights
(
layer
=
self
,
**
moe_quant_params
)
self
.
quant_method
.
create_weights
(
layer
=
self
,
**
moe_quant_params
)
def
_load_per_tensor_weight_scale
(
self
,
shard_id
:
str
,
def
_load_per_tensor_weight_scale
(
self
,
shard_id
:
str
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment