Unverified Commit 1c3a221d authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Bugfix] Fix corner case of sparse embedding (#33886)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent 7bd42e60
......@@ -136,6 +136,16 @@ async def test_bge_m3_api_server_sparse_embedding(client: openai.AsyncOpenAI):
)
@pytest.mark.asyncio
async def test_bge_m3_api_server_sparse_embedding_corner_case(
client: openai.AsyncOpenAI,
):
embeddings = await sparse_embeddings(client, ["Hi"])
assert len(embeddings) == 1
assert 2673 in embeddings[0]
assert embeddings[0][2673] == pytest.approx(0.26710861921310425, rel=0.01)
# https://github.com/FlagOpen/FlagEmbedding/blob/6fd176266f2382878bcc69cd656cff425d52f49b/FlagEmbedding/inference/embedder/encoder_only/m3.py#L163
def colbert_score(q_reps: torch.Tensor, p_reps: torch.Tensor) -> torch.Tensor:
token_scores = torch.einsum("in,jn->ij", q_reps, p_reps)
......
......@@ -165,7 +165,7 @@ class BOSEOSFilter(Pooler):
pooled_data = pooled_data[1:]
if token_ids[-1] == self.eos_token_id:
pooled_data = pooled_data[:-1]
pooled_outputs[i] = pooled_data.squeeze()
pooled_outputs[i] = pooled_data.squeeze(-1)
return pooled_outputs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment