"tests/vscode:/vscode.git/clone" did not exist on "1f69c4a892d35a362e9ffeaf74072aeecca2286b"
Unverified Commit 1c3a221d authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Bugfix] Fix corner case of sparse embedding (#33886)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent 7bd42e60
...@@ -136,6 +136,16 @@ async def test_bge_m3_api_server_sparse_embedding(client: openai.AsyncOpenAI): ...@@ -136,6 +136,16 @@ async def test_bge_m3_api_server_sparse_embedding(client: openai.AsyncOpenAI):
) )
@pytest.mark.asyncio
async def test_bge_m3_api_server_sparse_embedding_corner_case(
client: openai.AsyncOpenAI,
):
embeddings = await sparse_embeddings(client, ["Hi"])
assert len(embeddings) == 1
assert 2673 in embeddings[0]
assert embeddings[0][2673] == pytest.approx(0.26710861921310425, rel=0.01)
# https://github.com/FlagOpen/FlagEmbedding/blob/6fd176266f2382878bcc69cd656cff425d52f49b/FlagEmbedding/inference/embedder/encoder_only/m3.py#L163 # https://github.com/FlagOpen/FlagEmbedding/blob/6fd176266f2382878bcc69cd656cff425d52f49b/FlagEmbedding/inference/embedder/encoder_only/m3.py#L163
def colbert_score(q_reps: torch.Tensor, p_reps: torch.Tensor) -> torch.Tensor: def colbert_score(q_reps: torch.Tensor, p_reps: torch.Tensor) -> torch.Tensor:
token_scores = torch.einsum("in,jn->ij", q_reps, p_reps) token_scores = torch.einsum("in,jn->ij", q_reps, p_reps)
......
...@@ -165,7 +165,7 @@ class BOSEOSFilter(Pooler): ...@@ -165,7 +165,7 @@ class BOSEOSFilter(Pooler):
pooled_data = pooled_data[1:] pooled_data = pooled_data[1:]
if token_ids[-1] == self.eos_token_id: if token_ids[-1] == self.eos_token_id:
pooled_data = pooled_data[:-1] pooled_data = pooled_data[:-1]
pooled_outputs[i] = pooled_data.squeeze() pooled_outputs[i] = pooled_data.squeeze(-1)
return pooled_outputs return pooled_outputs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment