Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
11b94900
"vscode:/vscode.git/clone" did not exist on "e1098ced95146d98a4ed46c81ee709013d54fb1f"
Commit
11b94900
authored
Oct 12, 2025
by
zhuwenwen
Browse files
remove two_batch_overlap of moe and update use_mla
parent
6605af8e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1 addition
and
4 deletions
+1
-4
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+0
-3
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+1
-1
No files found.
vllm/model_executor/layers/fused_moe/layer.py
View file @
11b94900
...
...
@@ -1200,9 +1200,6 @@ class FusedMoE(CustomOp):
self
.
quant_method
.
create_weights
(
layer
=
self
,
**
moe_quant_params
)
from
vllm.two_batch_overlap.two_batch_overlap
import
tbo_all_reduce
self
.
tbo_all_reduce
=
tbo_all_reduce
# moe_fused_gate kernel ensure that num_experts/num_expert_group does not exceed MAX_VPT=32 now. And when kernel can handle MAX_VPT > 32, we can remove this assertion.
self
.
use_fused_gate
=
envs
.
VLLM_ENABLE_MOE_FUSED_GATE
\
and
self
.
e_score_correction_bias
is
not
None
\
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
11b94900
...
...
@@ -3854,7 +3854,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
kv_cache_spec
.
page_size_bytes
)
if
isinstance
(
kv_cache_spec
,
AttentionSpec
):
has_attn
=
True
if
envs
.
VLLM_USE_FLASH_ATTN_PA
and
not
kv_cache_spec
.
use_mla
:
if
envs
.
VLLM_USE_FLASH_ATTN_PA
and
not
self
.
vllm_config
.
model_config
.
use_mla
:
key_cache_shape
,
value_cache_shape
=
attn_backend
.
get_kv_cache_shape
(
num_blocks
,
kv_cache_spec
.
block_size
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment