Unverified Commit f262a62a authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[ROCm][CI] Fix flaky Cohere/OpenAI embedding parity test (#37616)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent 9ac2fcaf
...@@ -10,7 +10,7 @@ import numpy as np ...@@ -10,7 +10,7 @@ import numpy as np
import pytest import pytest
import requests import requests
from tests.utils import RemoteOpenAIServer from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
MODEL_NAME = "BAAI/bge-base-en-v1.5" MODEL_NAME = "BAAI/bge-base-en-v1.5"
DTYPE = "bfloat16" DTYPE = "bfloat16"
...@@ -28,7 +28,7 @@ def server(): ...@@ -28,7 +28,7 @@ def server():
"512", "512",
"--gpu-memory-utilization", "--gpu-memory-utilization",
"0.02", "0.02",
] ] + ROCM_EXTRA_ARGS
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server yield remote_server
......
...@@ -10,7 +10,7 @@ import pytest ...@@ -10,7 +10,7 @@ import pytest
from tests.conftest import HfRunner from tests.conftest import HfRunner
from tests.models.language.pooling.embed_utils import run_embedding_correctness_test from tests.models.language.pooling.embed_utils import run_embedding_correctness_test
from tests.models.utils import EmbedModelInfo from tests.models.utils import EmbedModelInfo
from tests.utils import RemoteOpenAIServer from tests.utils import ROCM_EXTRA_ARGS, RemoteOpenAIServer
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
from vllm.platforms import current_platform from vllm.platforms import current_platform
...@@ -49,7 +49,7 @@ def server(model_info, dtype: str): ...@@ -49,7 +49,7 @@ def server(model_info, dtype: str):
"--enforce-eager", "--enforce-eager",
"--max-model-len", "--max-model-len",
"512", "512",
] ] + ROCM_EXTRA_ARGS
if model_info.name == "Snowflake/snowflake-arctic-embed-m-v1.5": if model_info.name == "Snowflake/snowflake-arctic-embed-m-v1.5":
# Manually enable Matryoshka Embeddings # Manually enable Matryoshka Embeddings
......
...@@ -118,6 +118,7 @@ class PoolingServing: ...@@ -118,6 +118,7 @@ class PoolingServing:
) )
pooling_params = self.io_processor.create_pooling_params(ctx.request) pooling_params = self.io_processor.create_pooling_params(ctx.request)
pooling_params.verify(self.model_config)
for i, engine_prompt in enumerate(ctx.engine_prompts): for i, engine_prompt in enumerate(ctx.engine_prompts):
prompt_request_id = ( prompt_request_id = (
......
...@@ -309,6 +309,9 @@ def create_error_response( ...@@ -309,6 +309,9 @@ def create_error_response(
if isinstance(message, Exception): if isinstance(message, Exception):
exc = message exc = message
logger.debug(
"create_error_response called with %s: %s", type(exc).__name__, exc
)
from vllm.exceptions import VLLMNotFoundError, VLLMValidationError from vllm.exceptions import VLLMNotFoundError, VLLMValidationError
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment