Unverified Commit 8ea7df61 authored by kk's avatar kk Committed by GitHub
Browse files

[WA] fix output data is nan in CI test "test_moe_eval_accuracy_large.py" (#7021)


Co-authored-by: default avatarwunhuang <wunhuang@amd.com>
Co-authored-by: default avatarHAI <hixiao@gmail.com>
parent 4a102a2b
......@@ -717,6 +717,11 @@ class AiterIndicesUpdaterPrefill:
self.req_to_token = model_runner.req_to_token_pool.req_to_token
self.update = self.update_single_wrapper
# get the last index of the pool
self.pool_size = (
model_runner.token_to_kv_pool.size + model_runner.token_to_kv_pool.page_size
) - 1
self.kv_indices = None
self.max_q_len = 0
self.max_kv_len = 0
......@@ -754,8 +759,16 @@ class AiterIndicesUpdaterPrefill:
# Normal extend
kv_indptr[1 : bs + 1] = torch.cumsum(paged_kernel_lens, dim=0)
kv_indptr = kv_indptr[: bs + 1]
kv_indices = torch.empty(
paged_kernel_lens_sum + 256,
# (TODO: Kk) WA - CI test_moe_eval_accuracy_large.py
# mha_batch_prefill reads 128 data to do computatoin
# if real data is not long enough then original padding value 0 is used
# but the 0 location will be made nan (noqa) in cuda graph capture mode
# this will cause the output tensor value becomes nan
# WA is to assure that last index of pool not changed
kv_indices = torch.full(
(paged_kernel_lens_sum + 128,),
self.pool_size,
dtype=torch.int32,
device=req_pool_indices.device,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment