Unverified Commit c88860d7 authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Frontend] Score entrypoint support data_1 & data_2 and queries & documents as inputs (#32577)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent 758df5af
......@@ -694,7 +694,7 @@ Example template file: [examples/pooling/score/template/nemotron-rerank.jinja](.
#### Single inference
You can pass a string to both `text_1` and `text_2`, forming a single sentence pair.
You can pass a string to both `queries` and `documents`, forming a single sentence pair.
```bash
curl -X 'POST' \
......@@ -704,8 +704,8 @@ curl -X 'POST' \
-d '{
"model": "BAAI/bge-reranker-v2-m3",
"encoding_format": "float",
"text_1": "What is the capital of France?",
"text_2": "The capital of France is Paris."
"queries": "What is the capital of France?",
"documents": "The capital of France is Paris."
}'
```
......@@ -730,9 +730,9 @@ curl -X 'POST' \
#### Batch inference
You can pass a string to `text_1` and a list to `text_2`, forming multiple sentence pairs
where each pair is built from `text_1` and a string in `text_2`.
The total number of pairs is `len(text_2)`.
You can pass a string to `queries` and a list to `documents`, forming multiple sentence pairs
where each pair is built from `queries` and a string in `documents`.
The total number of pairs is `len(documents)`.
??? console "Request"
......@@ -743,8 +743,8 @@ The total number of pairs is `len(text_2)`.
-H 'Content-Type: application/json' \
-d '{
"model": "BAAI/bge-reranker-v2-m3",
"text_1": "What is the capital of France?",
"text_2": [
"queries": "What is the capital of France?",
"documents": [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris."
]
......@@ -775,9 +775,9 @@ The total number of pairs is `len(text_2)`.
}
```
You can pass a list to both `text_1` and `text_2`, forming multiple sentence pairs
where each pair is built from a string in `text_1` and the corresponding string in `text_2` (similar to `zip()`).
The total number of pairs is `len(text_2)`.
You can pass a list to both `queries` and `documents`, forming multiple sentence pairs
where each pair is built from a string in `queries` and the corresponding string in `documents` (similar to `zip()`).
The total number of pairs is `len(documents)`.
??? console "Request"
......@@ -789,11 +789,11 @@ The total number of pairs is `len(text_2)`.
-d '{
"model": "BAAI/bge-reranker-v2-m3",
"encoding_format": "float",
"text_1": [
"queries": [
"What is the capital of Brazil?",
"What is the capital of France?"
],
"text_2": [
"documents": [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris."
]
......@@ -847,8 +847,8 @@ You can pass multi-modal inputs to scoring models by passing `content` including
"http://localhost:8000/v1/score",
json={
"model": "jinaai/jina-reranker-m0",
"text_1": "slm markdown",
"text_2": {
"queries": "slm markdown",
"documents": {
"content": [
{
"type": "image_url",
......
......@@ -21,8 +21,8 @@ def parse_args():
def main(args: Namespace):
# Sample prompts.
text_1 = "What is the capital of France?"
texts_2 = [
query = "What is the capital of France?"
documents = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris.",
]
......@@ -32,13 +32,13 @@ def main(args: Namespace):
llm = LLM(**vars(args))
# Generate scores. The output is a list of ScoringRequestOutputs.
outputs = llm.score(text_1, texts_2)
outputs = llm.score(query, documents)
# Print the outputs.
print("\nGenerated Outputs:\n" + "-" * 60)
for text_2, output in zip(texts_2, outputs):
for document, output in zip(documents, outputs):
score = output.outputs.score
print(f"Pair: {[text_1, text_2]!r} \nScore: {score}")
print(f"Pair: {[query, document]!r} \nScore: {score}")
print("-" * 60)
......
......@@ -255,8 +255,8 @@ cat results.jsonl
Add score requests to your batch file. The following is an example:
```text
{"custom_id": "request-1", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-1", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "queries": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "queries": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
```
You can mix chat completion, embedding, and score requests in the batch file, as long as the model you are using supports them all (note that all requests must use the same model).
......
......@@ -50,8 +50,8 @@ documents = [
# Request payload for the score API
data = {
"model": "Qwen/Qwen3-Reranker-0.6B",
"text_1": queries,
"text_2": documents,
"queries": queries,
"documents": documents,
}
......
......@@ -30,29 +30,35 @@ def main(args):
api_url = f"http://{args.host}:{args.port}/score"
model_name = args.model
text_1 = "What is the capital of Brazil?"
text_2 = "The capital of Brazil is Brasilia."
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
queries = "What is the capital of Brazil?"
documents = "The capital of Brazil is Brasilia."
prompt = {"model": model_name, "queries": queries, "documents": documents}
score_response = post_http_request(prompt=prompt, api_url=api_url)
print("\nPrompt when text_1 and text_2 are both strings:")
print("\nPrompt when queries and documents are both strings:")
pprint.pprint(prompt)
print("\nScore Response:")
pprint.pprint(score_response.json())
text_1 = "What is the capital of France?"
text_2 = ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
queries = "What is the capital of France?"
documents = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris.",
]
prompt = {"model": model_name, "queries": queries, "documents": documents}
score_response = post_http_request(prompt=prompt, api_url=api_url)
print("\nPrompt when text_1 is string and text_2 is a list:")
print("\nPrompt when queries is string and documents is a list:")
pprint.pprint(prompt)
print("\nScore Response:")
pprint.pprint(score_response.json())
text_1 = ["What is the capital of Brazil?", "What is the capital of France?"]
text_2 = ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
queries = ["What is the capital of Brazil?", "What is the capital of France?"]
documents = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris.",
]
prompt = {"model": model_name, "queries": queries, "documents": documents}
score_response = post_http_request(prompt=prompt, api_url=api_url)
print("\nPrompt when text_1 and text_2 are both lists:")
print("\nPrompt when queries and documents are both lists:")
pprint.pprint(prompt)
print("\nScore Response:")
pprint.pprint(score_response.json())
......
......@@ -18,10 +18,22 @@ e.g.
"""
import argparse
import base64
import json
import requests
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
"""Encode a content retrieved from a remote url to base64 format."""
with requests.get(content_url, headers=headers) as response:
response.raise_for_status()
result = base64.b64encode(response.content).decode("utf-8")
return {"url": f"data:image/jpeg;base64,{result}"}
headers = {"accept": "application/json", "Content-Type": "application/json"}
query = "A woman playing with her dog on a beach at sunset."
......@@ -30,8 +42,8 @@ documents = {
{
"type": "text",
"text": (
"A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, " # noqa: E501
"as the dog offers its paw in a heartwarming display of companionship and trust." # noqa: E501
"A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, "
"as the dog offers its paw in a heartwarming display of companionship and trust."
),
},
{
......@@ -40,6 +52,12 @@ documents = {
"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
},
},
{
"type": "image_url",
"image_url": encode_base64_content_from_url(
"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
),
},
]
}
......
......@@ -17,15 +17,27 @@ e.g.
"""
import argparse
import base64
import json
import pprint
import requests
def encode_base64_content_from_url(content_url: str) -> dict[str, str]:
"""Encode a content retrieved from a remote url to base64 format."""
with requests.get(content_url, headers=headers) as response:
response.raise_for_status()
result = base64.b64encode(response.content).decode("utf-8")
return {"url": f"data:image/jpeg;base64,{result}"}
headers = {"accept": "application/json", "Content-Type": "application/json"}
text_1 = "slm markdown"
text_2 = {
queries = "slm markdown"
documents = {
"content": [
{
"type": "image_url",
......@@ -39,6 +51,12 @@ text_2 = {
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
},
},
{
"type": "image_url",
"image_url": encode_base64_content_from_url(
"https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
),
},
]
}
......@@ -58,9 +76,9 @@ def main(args):
response = requests.get(models_url, headers=headers)
model = response.json()["data"][0]["id"]
prompt = {"model": model, "text_1": text_1, "text_2": text_2}
prompt = {"model": model, "queries": queries, "documents": documents}
response = requests.post(score_url, headers=headers, json=prompt)
print("\nPrompt when text_1 is string and text_2 is a image list:")
print("\nPrompt when queries is string and documents is a image list:")
pprint.pprint(prompt)
print("\nScore Response:")
print(json.dumps(response.json(), indent=2))
......
......@@ -32,8 +32,8 @@ INPUT_EMBEDDING_BATCH = (
'{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}'
)
INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "text_1": "What is the capital of France?", "text_2": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}"""
INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "queries": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "queries": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}"""
INPUT_RERANK_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
......
......@@ -251,8 +251,8 @@ async def test_score(server: RemoteOpenAIServer, model_name: str):
server.url_for("score"),
json={
"model": model_name,
"text_1": "ping",
"text_2": "pong",
"queries": "ping",
"documents": "pong",
},
)
assert response.json()["error"]["type"] == "BadRequestError"
......
......@@ -43,12 +43,12 @@ def llm():
def test_pooling_params(llm: LLM):
def get_outputs(use_activation):
text_1 = "What is the capital of France?"
text_2 = "The capital of France is Paris."
queries = "What is the capital of France?"
documents = "The capital of France is Paris."
outputs = llm.score(
text_1,
text_2,
queries,
documents,
pooling_params=PoolingParams(use_activation=use_activation),
use_tqdm=False,
)
......
......@@ -61,14 +61,40 @@ def runner(model: dict[str, Any], hf_runner):
class TestModel:
def test_text_1_str_text_2_list(
def test_queries_str_documents_str(
self, server: RemoteOpenAIServer, model: dict[str, Any], runner
):
queries = "What is the capital of France?"
documents = "The capital of France is Paris."
score_response = requests.post(
server.url_for("score"),
json={
"model": model["name"],
"queries": queries,
"documents": documents,
},
)
score_response.raise_for_status()
score = ScoreResponse.model_validate(score_response.json())
assert score.id is not None
assert score.data is not None
assert len(score.data) == 1
vllm_outputs = [d.score for d in score.data]
text_pairs = [[queries, documents]]
hf_outputs = run_transformers(runner, model, text_pairs)
for i in range(len(vllm_outputs)):
assert hf_outputs[i] == pytest.approx(vllm_outputs[i], rel=0.01)
def test_text_1_str_text_2_str(
self, server: RemoteOpenAIServer, model: dict[str, Any], runner
):
text_1 = "What is the capital of France?"
text_2 = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris.",
]
text_2 = "The capital of France is Paris."
score_response = requests.post(
server.url_for("score"),
......@@ -83,24 +109,50 @@ class TestModel:
assert score.id is not None
assert score.data is not None
assert len(score.data) == 2
assert len(score.data) == 1
vllm_outputs = [d.score for d in score.data]
text_pairs = [[text_1, text_2[0]], [text_1, text_2[1]]]
text_pairs = [[text_1, text_2]]
hf_outputs = run_transformers(runner, model, text_pairs)
for i in range(len(vllm_outputs)):
assert hf_outputs[i] == pytest.approx(vllm_outputs[i], rel=0.01)
def test_text_1_list_text_2_list(
def test_data_1_str_data_2_str(
self, server: RemoteOpenAIServer, model: dict[str, Any], runner
):
text_1 = [
"What is the capital of the United States?",
"What is the capital of France?",
]
text_2 = [
data_1 = "What is the capital of France?"
data_2 = "The capital of France is Paris."
score_response = requests.post(
server.url_for("score"),
json={
"model": model["name"],
"data_1": data_1,
"data_2": data_2,
},
)
score_response.raise_for_status()
score = ScoreResponse.model_validate(score_response.json())
assert score.id is not None
assert score.data is not None
assert len(score.data) == 1
vllm_outputs = [d.score for d in score.data]
text_pairs = [[data_1, data_2]]
hf_outputs = run_transformers(runner, model, text_pairs)
for i in range(len(vllm_outputs)):
assert hf_outputs[i] == pytest.approx(vllm_outputs[i], rel=0.01)
def test_queries_str_documents_list(
self, server: RemoteOpenAIServer, model: dict[str, Any], runner
):
queries = "What is the capital of France?"
documents = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris.",
]
......@@ -109,8 +161,8 @@ class TestModel:
server.url_for("score"),
json={
"model": model["name"],
"text_1": text_1,
"text_2": text_2,
"queries": queries,
"documents": documents,
},
)
score_response.raise_for_status()
......@@ -122,24 +174,30 @@ class TestModel:
vllm_outputs = [d.score for d in score.data]
text_pairs = [[text_1[0], text_2[0]], [text_1[1], text_2[1]]]
text_pairs = [[queries, documents[0]], [queries, documents[1]]]
hf_outputs = run_transformers(runner, model, text_pairs)
for i in range(len(vllm_outputs)):
assert hf_outputs[i] == pytest.approx(vllm_outputs[i], rel=0.01)
def test_text_1_str_text_2_str(
def test_queries_list_documents_list(
self, server: RemoteOpenAIServer, model: dict[str, Any], runner
):
text_1 = "What is the capital of France?"
text_2 = "The capital of France is Paris."
queries = [
"What is the capital of the United States?",
"What is the capital of France?",
]
documents = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris.",
]
score_response = requests.post(
server.url_for("score"),
json={
"model": model["name"],
"text_1": text_1,
"text_2": text_2,
"queries": queries,
"documents": documents,
},
)
score_response.raise_for_status()
......@@ -147,11 +205,11 @@ class TestModel:
assert score.id is not None
assert score.data is not None
assert len(score.data) == 1
assert len(score.data) == 2
vllm_outputs = [d.score for d in score.data]
text_pairs = [[text_1, text_2]]
text_pairs = [[queries[0], documents[0]], [queries[1], documents[1]]]
hf_outputs = run_transformers(runner, model, text_pairs)
for i in range(len(vllm_outputs)):
......@@ -160,8 +218,8 @@ class TestModel:
def test_score_max_model_len(
self, server: RemoteOpenAIServer, model: dict[str, Any]
):
text_1 = "What is the capital of France?" * 20
text_2 = [
queries = "What is the capital of France?" * 20
documents = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris.",
]
......@@ -170,8 +228,8 @@ class TestModel:
server.url_for("score"),
json={
"model": model["name"],
"text_1": text_1,
"text_2": text_2,
"queries": queries,
"documents": documents,
},
)
assert score_response.status_code == 400
......@@ -183,8 +241,8 @@ class TestModel:
server.url_for("score"),
json={
"model": model["name"],
"text_1": text_1,
"text_2": text_2,
"queries": queries,
"documents": documents,
"truncate_prompt_tokens": 101,
},
)
......@@ -192,13 +250,13 @@ class TestModel:
assert "Please, select a smaller truncation size." in score_response.text
def test_invocations(self, server: RemoteOpenAIServer, model: dict[str, Any]):
text_1 = "What is the capital of France?"
text_2 = "The capital of France is Paris."
queries = "What is the capital of France?"
documents = "The capital of France is Paris."
request_args = {
"model": model["name"],
"text_1": text_1,
"text_2": text_2,
"queries": queries,
"documents": documents,
}
score_response = requests.post(server.url_for("score"), json=request_args)
......@@ -225,14 +283,14 @@ class TestModel:
def test_use_activation(self, server: RemoteOpenAIServer, model: dict[str, Any]):
def get_outputs(use_activation):
text_1 = "What is the capital of France?"
text_2 = "The capital of France is Paris."
queries = "What is the capital of France?"
documents = "The capital of France is Paris."
response = requests.post(
server.url_for("score"),
json={
"model": model["name"],
"text_1": text_1,
"text_2": text_2,
"queries": queries,
"documents": documents,
"use_activation": use_activation,
},
)
......
......@@ -117,8 +117,8 @@ class ScoreClientMtebEncoder(MtebCrossEncoderMixin):
self.url,
json={
"model": self.model_name,
"text_1": query,
"text_2": corpus,
"queries": query,
"documents": corpus,
"truncate_prompt_tokens": -1,
},
).json()
......
......@@ -84,8 +84,11 @@ from vllm.entrypoints.pooling.pooling.protocol import (
)
from vllm.entrypoints.pooling.score.protocol import (
RerankRequest,
ScoreDataRequest,
ScoreQueriesDocumentsRequest,
ScoreRequest,
ScoreResponse,
ScoreTextRequest,
)
from vllm.entrypoints.renderer import BaseRenderer, CompletionRenderer, RenderConfig
from vllm.entrypoints.serve.disagg.protocol import GenerateRequest, GenerateResponse
......@@ -1032,7 +1035,9 @@ class OpenAIServing:
(
EmbeddingChatRequest,
EmbeddingCompletionRequest,
ScoreRequest,
ScoreDataRequest,
ScoreTextRequest,
ScoreQueriesDocumentsRequest,
RerankRequest,
ClassificationCompletionRequest,
ClassificationChatRequest,
......@@ -1042,7 +1047,9 @@ class OpenAIServing:
# since these requests don't generate tokens.
if token_num > self.max_model_len:
operations: dict[type[AnyRequest], str] = {
ScoreRequest: "score",
ScoreDataRequest: "score",
ScoreTextRequest: "score",
ScoreQueriesDocumentsRequest: "score",
ClassificationCompletionRequest: "classification",
ClassificationChatRequest: "classification",
}
......
......@@ -85,7 +85,7 @@ class BatchRequestInput(OpenAIBaseModel):
if url == "/v1/embeddings":
return TypeAdapter(EmbeddingRequest).validate_python(value)
if url.endswith("/score"):
return ScoreRequest.model_validate(value)
return TypeAdapter(ScoreRequest).validate_python(value)
if url.endswith("/rerank"):
return RerankRequest.model_validate(value)
return TypeAdapter(BatchRequestInputBody).validate_python(value)
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import time
from typing import Any
from typing import Any, TypeAlias
from pydantic import (
BaseModel,
......@@ -19,10 +19,7 @@ from vllm.entrypoints.pooling.score.utils import (
from vllm.utils import random_uuid
class ScoreRequest(PoolingBasicRequestMixin):
text_1: list[str] | str | ScoreMultiModalParam
text_2: list[str] | str | ScoreMultiModalParam
class ScoreRequestMixin(PoolingBasicRequestMixin):
# --8<-- [start:score-extra-params]
mm_processor_kwargs: dict[str, Any] | None = Field(
default=None,
......@@ -53,6 +50,42 @@ class ScoreRequest(PoolingBasicRequestMixin):
)
class ScoreDataRequest(ScoreRequestMixin):
data_1: list[str] | str | ScoreMultiModalParam
data_2: list[str] | str | ScoreMultiModalParam
class ScoreQueriesDocumentsRequest(ScoreRequestMixin):
queries: list[str] | str | ScoreMultiModalParam
documents: list[str] | str | ScoreMultiModalParam
@property
def data_1(self):
return self.queries
@property
def data_2(self):
return self.documents
class ScoreTextRequest(ScoreRequestMixin):
text_1: list[str] | str | ScoreMultiModalParam
text_2: list[str] | str | ScoreMultiModalParam
@property
def data_1(self):
return self.text_1
@property
def data_2(self):
return self.text_2
ScoreRequest: TypeAlias = (
ScoreQueriesDocumentsRequest | ScoreDataRequest | ScoreTextRequest
)
class RerankRequest(PoolingBasicRequestMixin):
query: str | ScoreMultiModalParam
documents: list[str] | ScoreMultiModalParam
......
......@@ -66,15 +66,15 @@ class ServingScores(OpenAIServing):
async def _embedding_score(
self,
tokenizer: TokenizerLike,
texts_1: list[str],
texts_2: list[str],
data_1: list[str],
data_2: list[str],
request: RerankRequest | ScoreRequest,
request_id: str,
tokenization_kwargs: dict[str, Any] | None = None,
lora_request: LoRARequest | None | None = None,
trace_headers: Mapping[str, str] | None = None,
) -> list[PoolingRequestOutput] | ErrorResponse:
input_texts = texts_1 + texts_2
input_texts = data_1 + data_2
engine_prompts: list[TokensPrompt] = []
tokenize_async = make_async(
......@@ -135,22 +135,22 @@ class ServingScores(OpenAIServing):
async for i, res in result_generator:
embeddings[i] = res
emb_texts_1: list[PoolingRequestOutput] = []
emb_texts_2: list[PoolingRequestOutput] = []
emb_data_1: list[PoolingRequestOutput] = []
emb_data_2: list[PoolingRequestOutput] = []
for i in range(0, len(texts_1)):
for i in range(0, len(data_1)):
assert (emb := embeddings[i]) is not None
emb_texts_1.append(emb)
emb_data_1.append(emb)
for i in range(len(texts_1), len(embeddings)):
for i in range(len(data_1), len(embeddings)):
assert (emb := embeddings[i]) is not None
emb_texts_2.append(emb)
emb_data_2.append(emb)
if len(emb_texts_1) == 1:
emb_texts_1 = emb_texts_1 * len(emb_texts_2)
if len(emb_data_1) == 1:
emb_data_1 = emb_data_1 * len(emb_data_2)
final_res_batch = _cosine_similarity(
tokenizer=tokenizer, embed_1=emb_texts_1, embed_2=emb_texts_2
tokenizer=tokenizer, embed_1=emb_data_1, embed_2=emb_data_2
)
return final_res_batch
......@@ -333,8 +333,8 @@ class ServingScores(OpenAIServing):
else:
return await self._embedding_score(
tokenizer=tokenizer,
texts_1=data_1, # type: ignore[arg-type]
texts_2=data_2, # type: ignore[arg-type]
data_1=data_1, # type: ignore[arg-type]
data_2=data_2, # type: ignore[arg-type]
request=request,
request_id=request_id,
tokenization_kwargs=tokenization_kwargs,
......@@ -361,8 +361,8 @@ class ServingScores(OpenAIServing):
try:
final_res_batch = await self._run_scoring(
request.text_1,
request.text_2,
request.data_1,
request.data_2,
request,
request_id,
raw_request,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment