Unverified Commit c8492978 authored by YAMY's avatar YAMY Committed by GitHub
Browse files

Fix Illegal Instruction/IMA errors when using DP attention --...

Fix Illegal Instruction/IMA errors when using DP attention -- num_tokens_for_logprob calculation (#12115)
parent 428710c2
...@@ -2073,15 +2073,18 @@ class Scheduler( ...@@ -2073,15 +2073,18 @@ class Scheduler(
num_tokens_for_logprob = num_tokens num_tokens_for_logprob = num_tokens
else: else:
num_tokens = local_batch.extend_num_tokens num_tokens = local_batch.extend_num_tokens
num_tokens_for_logprob = sum( if local_batch.return_logprob:
[ num_tokens_for_logprob = sum(
# We should have at least 1 token for sample in every case. # We should have at least 1 token for sample in every case.
max(extend_len - logprob_start_len, 1) max(extend_len - logprob_start_len, 1)
for logprob_start_len, extend_len in zip( for logprob_start_len, extend_len in zip(
local_batch.extend_logprob_start_lens, local_batch.extend_lens local_batch.extend_logprob_start_lens,
local_batch.extend_lens,
) )
] )
) else:
# When return_logprob = False, only need last token per request
num_tokens_for_logprob = local_batch.batch_size()
if local_batch is None or local_batch.forward_mode.is_decode_or_idle(): if local_batch is None or local_batch.forward_mode.is_decode_or_idle():
can_cuda_graph = 1 can_cuda_graph = 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment