Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f262a62a
Unverified
Commit
f262a62a
authored
Mar 25, 2026
by
Andreas Karatzas
Committed by
GitHub
Mar 25, 2026
Browse files
[ROCm][CI] Fix flaky Cohere/OpenAI embedding parity test (#37616)
Signed-off-by:
Andreas Karatzas
<
akaratza@amd.com
>
parent
9ac2fcaf
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
4 deletions
+8
-4
tests/entrypoints/pooling/embed/test_cohere_openai_parity.py
tests/entrypoints/pooling/embed/test_cohere_openai_parity.py
+2
-2
tests/entrypoints/pooling/embed/test_online_dimensions.py
tests/entrypoints/pooling/embed/test_online_dimensions.py
+2
-2
vllm/entrypoints/pooling/base/serving.py
vllm/entrypoints/pooling/base/serving.py
+1
-0
vllm/entrypoints/utils.py
vllm/entrypoints/utils.py
+3
-0
No files found.
tests/entrypoints/pooling/embed/test_cohere_openai_parity.py
View file @
f262a62a
...
...
@@ -10,7 +10,7 @@ import numpy as np
import
pytest
import
requests
from
tests.utils
import
RemoteOpenAIServer
from
tests.utils
import
ROCM_EXTRA_ARGS
,
RemoteOpenAIServer
MODEL_NAME
=
"BAAI/bge-base-en-v1.5"
DTYPE
=
"bfloat16"
...
...
@@ -28,7 +28,7 @@ def server():
"512"
,
"--gpu-memory-utilization"
,
"0.02"
,
]
]
+
ROCM_EXTRA_ARGS
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
yield
remote_server
...
...
tests/entrypoints/pooling/embed/test_online_dimensions.py
View file @
f262a62a
...
...
@@ -10,7 +10,7 @@ import pytest
from
tests.conftest
import
HfRunner
from
tests.models.language.pooling.embed_utils
import
run_embedding_correctness_test
from
tests.models.utils
import
EmbedModelInfo
from
tests.utils
import
RemoteOpenAIServer
from
tests.utils
import
ROCM_EXTRA_ARGS
,
RemoteOpenAIServer
from
vllm.entrypoints.pooling.embed.protocol
import
EmbeddingResponse
from
vllm.platforms
import
current_platform
...
...
@@ -49,7 +49,7 @@ def server(model_info, dtype: str):
"--enforce-eager"
,
"--max-model-len"
,
"512"
,
]
]
+
ROCM_EXTRA_ARGS
if
model_info
.
name
==
"Snowflake/snowflake-arctic-embed-m-v1.5"
:
# Manually enable Matryoshka Embeddings
...
...
vllm/entrypoints/pooling/base/serving.py
View file @
f262a62a
...
...
@@ -118,6 +118,7 @@ class PoolingServing:
)
pooling_params
=
self
.
io_processor
.
create_pooling_params
(
ctx
.
request
)
pooling_params
.
verify
(
self
.
model_config
)
for
i
,
engine_prompt
in
enumerate
(
ctx
.
engine_prompts
):
prompt_request_id
=
(
...
...
vllm/entrypoints/utils.py
View file @
f262a62a
...
...
@@ -309,6 +309,9 @@ def create_error_response(
if
isinstance
(
message
,
Exception
):
exc
=
message
logger
.
debug
(
"create_error_response called with %s: %s"
,
type
(
exc
).
__name__
,
exc
)
from
vllm.exceptions
import
VLLMNotFoundError
,
VLLMValidationError
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment