Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
09396f62
Commit
09396f62
authored
Jul 21, 2025
by
zhuwenwen
Browse files
update layer.py
parent
3d062a1c
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
10 deletions
+11
-10
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+11
-10
No files found.
vllm/model_executor/layers/fused_moe/layer.py
View file @
09396f62
...
@@ -28,8 +28,8 @@ from vllm.model_executor.layers.fused_moe.config import (
...
@@ -28,8 +28,8 @@ from vllm.model_executor.layers.fused_moe.config import (
from
vllm.model_executor.layers.fused_moe.modular_kernel
import
(
from
vllm.model_executor.layers.fused_moe.modular_kernel
import
(
FusedMoEActivationFormat
,
FusedMoEModularKernel
,
FusedMoEActivationFormat
,
FusedMoEModularKernel
,
FusedMoEPermuteExpertsUnpermute
,
FusedMoEPrepareAndFinalize
)
FusedMoEPermuteExpertsUnpermute
,
FusedMoEPrepareAndFinalize
)
from
vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe
import
(
#
from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
is_rocm_aiter_moe_enabled
)
#
is_rocm_aiter_moe_enabled)
from
vllm.model_executor.layers.quantization.base_config
import
(
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
,
QuantizeMethodBase
)
QuantizationConfig
,
QuantizeMethodBase
)
from
vllm.model_executor.utils
import
set_weight_attrs
from
vllm.model_executor.utils
import
set_weight_attrs
...
@@ -228,7 +228,8 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
...
@@ -228,7 +228,8 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
self
.
topk_indices_dtype
=
None
self
.
topk_indices_dtype
=
None
self
.
moe
=
moe
self
.
moe
=
moe
self
.
rocm_aiter_moe_enabled
=
is_rocm_aiter_moe_enabled
()
# self.rocm_aiter_moe_enabled = is_rocm_aiter_moe_enabled()
self
.
rocm_aiter_moe_enabled
=
False
if
self
.
rocm_aiter_moe_enabled
:
if
self
.
rocm_aiter_moe_enabled
:
from
.rocm_aiter_fused_moe
import
rocm_aiter_fused_experts
from
.rocm_aiter_fused_moe
import
rocm_aiter_fused_experts
self
.
rocm_aiter_fused_experts
=
rocm_aiter_fused_experts
self
.
rocm_aiter_fused_experts
=
rocm_aiter_fused_experts
...
@@ -309,15 +310,15 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
...
@@ -309,15 +310,15 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
layer
.
w13_weight
.
data
=
self
.
_maybe_pad_weight
(
layer
.
w13_weight
.
data
)
layer
.
w13_weight
.
data
=
self
.
_maybe_pad_weight
(
layer
.
w13_weight
.
data
)
layer
.
w2_weight
.
data
=
self
.
_maybe_pad_weight
(
layer
.
w2_weight
.
data
)
layer
.
w2_weight
.
data
=
self
.
_maybe_pad_weight
(
layer
.
w2_weight
.
data
)
# Lazy import to avoid importing triton.
# Lazy import to avoid importing triton.
from
vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe
import
(
#
from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
shuffle_weights
)
#
shuffle_weights)
if
self
.
rocm_aiter_moe_enabled
:
#
if self.rocm_aiter_moe_enabled:
shuffled_w13
,
shuffled_w2
=
shuffle_weights
(
#
shuffled_w13, shuffled_w2 = shuffle_weights(
layer
.
w13_weight
.
data
,
layer
.
w2_weight
.
data
)
#
layer.w13_weight.data, layer.w2_weight.data)
layer
.
w13_weight
.
data
=
shuffled_w13
#
layer.w13_weight.data = shuffled_w13
layer
.
w2_weight
.
data
=
shuffled_w2
#
layer.w2_weight.data = shuffled_w2
if
current_platform
.
is_cpu
():
if
current_platform
.
is_cpu
():
if
current_platform
.
get_cpu_architecture
()
==
CpuArchEnum
.
X86
:
if
current_platform
.
get_cpu_architecture
()
==
CpuArchEnum
.
X86
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment