Unverified Commit b386bb3d authored by Augusto Yao's avatar Augusto Yao Committed by GitHub
Browse files

fix bugs when token_classify & classify run concurrently (#36614)


Signed-off-by: default avataraugusto.yjh <augusto.yjh@antgroup.com>
parent fe714dd5
......@@ -47,10 +47,13 @@ class AllPool(TokenPoolingMethod):
pooling_metadata: PoolingMetadata,
) -> list[TokenPoolingMethodOutputItem]:
pooling_cursor = pooling_metadata.get_pooling_cursor()
hidden_states_all = hidden_states.split(
pooling_cursor.num_scheduled_tokens_cpu.tolist()
)
hidden_states_lst = [hidden_states_all[i] for i in pooling_cursor.index]
hidden_states_lst = [
hidden_states[first : last + 1]
for first, last in zip(
pooling_cursor.first_token_indices_gpu.tolist(),
pooling_cursor.last_token_indices_gpu.tolist(),
)
]
if not self.enable_chunked_prefill:
return hidden_states_lst
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment