Commit df03e33b authored by yangql's avatar yangql
Browse files

取出deepep的部分调试信息

parent 29523973
......@@ -922,7 +922,7 @@ class DeepGemmDisabledFusedMoEModularKernel(torch.nn.Module):
num_ht_ll_tokens = envs.VLLM_MOE_HT_THRESHOLD
num_tokens = hidden_states.size(0)
logger.info("num_tokens=%d", num_tokens)
if num_tokens > num_ht_ll_tokens and False:
if num_tokens > num_ht_ll_tokens:
prepare_finalize = self.prepare_finalize.ht_prepare_finalize
fused_experts = self.fused_experts_ht
......
......@@ -1316,7 +1316,7 @@ class GPUModelRunnerBase(LoRAModelRunnerMixin):
spec_decode_metadata,
num_scheduled_tokens_np) = (self._prepare_inputs(scheduler_output))
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
logger.info("***********self.cudagraph_batch_sizes_max",self.cudagraph_batch_sizes[-1])
if (self.use_cuda_graph
and num_scheduled_tokens <= self.cudagraph_batch_sizes[-1]):
# Use piecewise CUDA graphs.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment