"vscode:/vscode.git/clone" did not exist on "98df153abfcc443218aacfe61b3fd5abe2b88142"
Unverified Commit b386bb3d authored by Augusto Yao's avatar Augusto Yao Committed by GitHub
Browse files

fix bugs when token_classify & classify run concurrently (#36614)


Signed-off-by: default avataraugusto.yjh <augusto.yjh@antgroup.com>
parent fe714dd5
...@@ -47,10 +47,13 @@ class AllPool(TokenPoolingMethod): ...@@ -47,10 +47,13 @@ class AllPool(TokenPoolingMethod):
pooling_metadata: PoolingMetadata, pooling_metadata: PoolingMetadata,
) -> list[TokenPoolingMethodOutputItem]: ) -> list[TokenPoolingMethodOutputItem]:
pooling_cursor = pooling_metadata.get_pooling_cursor() pooling_cursor = pooling_metadata.get_pooling_cursor()
hidden_states_all = hidden_states.split( hidden_states_lst = [
pooling_cursor.num_scheduled_tokens_cpu.tolist() hidden_states[first : last + 1]
for first, last in zip(
pooling_cursor.first_token_indices_gpu.tolist(),
pooling_cursor.last_token_indices_gpu.tolist(),
) )
hidden_states_lst = [hidden_states_all[i] for i in pooling_cursor.index] ]
if not self.enable_chunked_prefill: if not self.enable_chunked_prefill:
return hidden_states_lst return hidden_states_lst
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment