Unverified Commit 103a468b authored by Chenguang Zheng's avatar Chenguang Zheng Committed by GitHub
Browse files

[bugfix] Missing cached item in beam search (#27874)


Signed-off-by: default avatarfake0fan <645327136@qq.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 70bfbd7b
...@@ -345,22 +345,7 @@ class OpenAIServing: ...@@ -345,22 +345,7 @@ class OpenAIServing:
if is_explicit_encoder_decoder_prompt(prompt): if is_explicit_encoder_decoder_prompt(prompt):
raise NotImplementedError raise NotImplementedError
else:
processed_inputs = processor.input_preprocessor._prompt_to_llm_inputs(
prompt
)
if processed_inputs["type"] == "embeds":
raise NotImplementedError
# This is a workaround to fix multimodal beam search; this is a
# bandaid fix for 2 small problems:
# 1. Multi_modal_data on the processed_inputs currently resolves to
# `None`.
# 2. preprocessing above expands the multimodal placeholders. However,
# this happens again in generation, so the double expansion causes
# a mismatch.
# TODO - would be ideal to handle this more gracefully.
prompt_text: str | None prompt_text: str | None
prompt_token_ids: list[int] prompt_token_ids: list[int]
multi_modal_data: MultiModalDataDict | None multi_modal_data: MultiModalDataDict | None
...@@ -373,9 +358,16 @@ class OpenAIServing: ...@@ -373,9 +358,16 @@ class OpenAIServing:
prompt_token_ids = prompt.get("prompt_token_ids", []) # type: ignore prompt_token_ids = prompt.get("prompt_token_ids", []) # type: ignore
multi_modal_data = prompt.get("multi_modal_data") # type: ignore multi_modal_data = prompt.get("multi_modal_data") # type: ignore
mm_processor_kwargs: dict[str, Any] | None = processed_inputs.get( mm_processor_kwargs: dict[str, Any] | None = None
"mm_processor_kwargs"
) # type: ignore # This is a workaround to fix multimodal beam search; this is a
# bandaid fix for 2 small problems:
# 1. Multi_modal_data on the processed_inputs currently resolves to
# `None`.
# 2. preprocessing above expands the multimodal placeholders. However,
# this happens again in generation, so the double expansion causes
# a mismatch.
# TODO - would be ideal to handle this more gracefully.
tokenized_length = len(prompt_token_ids) tokenized_length = len(prompt_token_ids)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment