Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
df03e33b
"vscode:/vscode.git/clone" did not exist on "a57a3044aa57bc4100e2033e23b6fa1dfb051a2e"
Commit
df03e33b
authored
Dec 23, 2025
by
yangql
Browse files
取出deepep的部分调试信息
parent
29523973
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2 additions
and
2 deletions
+2
-2
vllm/model_executor/layers/fused_moe/modular_kernel.py
vllm/model_executor/layers/fused_moe/modular_kernel.py
+1
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+1
-1
No files found.
vllm/model_executor/layers/fused_moe/modular_kernel.py
View file @
df03e33b
...
...
@@ -922,7 +922,7 @@ class DeepGemmDisabledFusedMoEModularKernel(torch.nn.Module):
num_ht_ll_tokens
=
envs
.
VLLM_MOE_HT_THRESHOLD
num_tokens
=
hidden_states
.
size
(
0
)
logger
.
info
(
"num_tokens=%d"
,
num_tokens
)
if
num_tokens
>
num_ht_ll_tokens
and
False
:
if
num_tokens
>
num_ht_ll_tokens
:
prepare_finalize
=
self
.
prepare_finalize
.
ht_prepare_finalize
fused_experts
=
self
.
fused_experts_ht
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
df03e33b
...
...
@@ -1316,7 +1316,7 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
spec_decode_metadata
,
num_scheduled_tokens_np
)
=
(
self
.
_prepare_inputs
(
scheduler_output
))
num_scheduled_tokens
=
scheduler_output
.
total_num_scheduled_tokens
logger
.
info
(
"***********self.cudagraph_batch_sizes_max"
,
self
.
cudagraph_batch_sizes
[
-
1
])
if
(
self
.
use_cuda_graph
and
num_scheduled_tokens
<=
self
.
cudagraph_batch_sizes
[
-
1
]):
# Use piecewise CUDA graphs.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment