Unverified Commit 0b769992 authored by ℍ𝕠𝕝𝕝𝕠𝕨 𝕄𝕒𝕟's avatar ℍ𝕠𝕝𝕝𝕠𝕨 𝕄𝕒𝕟 Committed by GitHub
Browse files

[Bugfix]: Use float32 for base64 embedding (#7855)


Signed-off-by: default avatarHollow Man <hollowman@opensuse.org>
parent 1856aff4
...@@ -19,7 +19,6 @@ responses = client.embeddings.create( ...@@ -19,7 +19,6 @@ responses = client.embeddings.create(
"The best thing about vLLM is that it supports many different models" "The best thing about vLLM is that it supports many different models"
], ],
model=model, model=model,
encoding_format="float",
) )
for data in responses.data: for data in responses.data:
......
...@@ -128,9 +128,18 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI, ...@@ -128,9 +128,18 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI,
for data in responses_base64.data: for data in responses_base64.data:
decoded_responses_base64_data.append( decoded_responses_base64_data.append(
np.frombuffer(base64.b64decode(data.embedding), np.frombuffer(base64.b64decode(data.embedding),
dtype="float").tolist()) dtype="float32").tolist())
assert responses_float.data[0].embedding == decoded_responses_base64_data[ assert responses_float.data[0].embedding == decoded_responses_base64_data[
0] 0]
assert responses_float.data[1].embedding == decoded_responses_base64_data[ assert responses_float.data[1].embedding == decoded_responses_base64_data[
1] 1]
# Default response is float32 decoded from base64 by OpenAI Client
responses_default = await embedding_client.embeddings.create(
input=input_texts, model=model_name)
assert responses_float.data[0].embedding == responses_default.data[
0].embedding
assert responses_float.data[1].embedding == responses_default.data[
1].embedding
...@@ -31,7 +31,9 @@ def _get_embedding( ...@@ -31,7 +31,9 @@ def _get_embedding(
if encoding_format == "float": if encoding_format == "float":
return output.embedding return output.embedding
elif encoding_format == "base64": elif encoding_format == "base64":
embedding_bytes = np.array(output.embedding).tobytes() # Force to use float32 for base64 encoding
# to match the OpenAI python client behavior
embedding_bytes = np.array(output.embedding, dtype="float32").tobytes()
return base64.b64encode(embedding_bytes).decode("utf-8") return base64.b64encode(embedding_bytes).decode("utf-8")
assert_never(encoding_format) assert_never(encoding_format)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment