"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "0f71e24034263363e48cdc6ae036e7ca057a4f44"
Unverified Commit ef53395e authored by Augusto Yao's avatar Augusto Yao Committed by GitHub
Browse files

[bugfix] do not add extra linebreak for score/rerank with chat template (#38617)


Signed-off-by: default avataraugusto.yjh <augusto.yjh@antgroup.com>
Signed-off-by: default avatarwang.yuqi <noooop@126.com>
Co-authored-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
Co-authored-by: default avatarwang.yuqi <noooop@126.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent eb474549
...@@ -234,7 +234,7 @@ async def test_score_api_queries_str_documents_image_url_plus_text_content( ...@@ -234,7 +234,7 @@ async def test_score_api_queries_str_documents_image_url_plus_text_content(
assert score.id is not None assert score.id is not None
assert score.data is not None assert score.data is not None
assert len(score.data) == 1 assert len(score.data) == 1
assert score.usage.prompt_tokens == 108 assert score.usage.prompt_tokens == 107
assert_score( assert_score(
score.data[0].score, TEXT_VS_TEXT_PLUS_IMAGE, backend, "text_vs_text_plus_image" score.data[0].score, TEXT_VS_TEXT_PLUS_IMAGE, backend, "text_vs_text_plus_image"
) )
...@@ -264,7 +264,7 @@ async def test_score_api_queries_str_documents_list( ...@@ -264,7 +264,7 @@ async def test_score_api_queries_str_documents_list(
assert score.id is not None assert score.id is not None
assert score.data is not None assert score.data is not None
assert len(score.data) == 4 assert len(score.data) == 4
assert score.usage.prompt_tokens == 368 assert score.usage.prompt_tokens == 367
assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "list[0]_text_vs_text") assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "list[0]_text_vs_text")
assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "list[1]_text_vs_text") assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "list[1]_text_vs_text")
assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "list[2]_text_vs_image") assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "list[2]_text_vs_image")
...@@ -353,7 +353,7 @@ async def test_score_api_queries_list_documents_list( ...@@ -353,7 +353,7 @@ async def test_score_api_queries_list_documents_list(
assert score.id is not None assert score.id is not None
assert score.data is not None assert score.data is not None
assert len(score.data) == 4 assert len(score.data) == 4
assert score.usage.prompt_tokens == 368 assert score.usage.prompt_tokens == 367
assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "paired[0]_text_vs_text") assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "paired[0]_text_vs_text")
assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "paired[1]_text_vs_text") assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "paired[1]_text_vs_text")
assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "paired[2]_text_vs_image") assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "paired[2]_text_vs_image")
......
...@@ -1187,6 +1187,7 @@ def _get_full_multimodal_text_prompt( ...@@ -1187,6 +1187,7 @@ def _get_full_multimodal_text_prompt(
placeholder_storage: dict[str, list], placeholder_storage: dict[str, list],
texts: list[str], texts: list[str],
interleave_strings: bool, interleave_strings: bool,
multimodal_content_part_separator: str = "\n",
) -> str: ) -> str:
"""Combine multimodal prompts for a multimodal language model.""" """Combine multimodal prompts for a multimodal language model."""
...@@ -1232,9 +1233,11 @@ def _get_full_multimodal_text_prompt( ...@@ -1232,9 +1233,11 @@ def _get_full_multimodal_text_prompt(
# NOTE: Default behaviour: we always add missing placeholders # NOTE: Default behaviour: we always add missing placeholders
# at the front of the prompt, if interleave_strings=False # at the front of the prompt, if interleave_strings=False
if text_prompt: if text_prompt:
return "\n".join(missing_placeholders + [text_prompt]) return multimodal_content_part_separator.join(
missing_placeholders + [text_prompt]
)
else: else:
return "\n".join(missing_placeholders) return multimodal_content_part_separator.join(missing_placeholders)
# No need to validate using Pydantic again # No need to validate using Pydantic again
...@@ -1384,6 +1387,7 @@ def _parse_chat_message_content_parts( ...@@ -1384,6 +1387,7 @@ def _parse_chat_message_content_parts(
wrap_dicts: bool, wrap_dicts: bool,
interleave_strings: bool, interleave_strings: bool,
mm_processor_kwargs: dict[str, Any] | None = None, mm_processor_kwargs: dict[str, Any] | None = None,
multimodal_content_part_separator="\n",
) -> list[ConversationMessage]: ) -> list[ConversationMessage]:
content = list[_ContentPart]() content = list[_ContentPart]()
...@@ -1406,7 +1410,10 @@ def _parse_chat_message_content_parts( ...@@ -1406,7 +1410,10 @@ def _parse_chat_message_content_parts(
mm_placeholder_storage = mm_parser.mm_placeholder_storage() mm_placeholder_storage = mm_parser.mm_placeholder_storage()
if mm_placeholder_storage: if mm_placeholder_storage:
text_prompt = _get_full_multimodal_text_prompt( text_prompt = _get_full_multimodal_text_prompt(
mm_placeholder_storage, texts, interleave_strings mm_placeholder_storage,
texts,
interleave_strings,
multimodal_content_part_separator=multimodal_content_part_separator,
) )
else: else:
text_prompt = "\n".join(texts) text_prompt = "\n".join(texts)
......
...@@ -150,6 +150,7 @@ def _parse_score_content( ...@@ -150,6 +150,7 @@ def _parse_score_content(
mm_tracker=mm_tracker, mm_tracker=mm_tracker,
wrap_dicts=False, wrap_dicts=False,
interleave_strings=False, interleave_strings=False,
multimodal_content_part_separator="",
) )
if parse_res: if parse_res:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment