Unverified Commit 069434b4 authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

fix: Fix vllm multimodal tests (#3361)


Signed-off-by: default avatarkrishung5 <krish@nvidia.com>
parent de6fdf0c
...@@ -229,10 +229,13 @@ class Processor(ProcessMixIn): ...@@ -229,10 +229,13 @@ class Processor(ProcessMixIn):
"content": prompt, "content": prompt,
} }
# Set stream=True - the http frontend will handle aggregation of
# streamed chunks into a single http response, or stream them
# back as SSE responses based on the stream flag in the request.
chat_request = ChatCompletionRequest( chat_request = ChatCompletionRequest(
model=raw_request.model, model=raw_request.model,
messages=[msg], messages=[msg],
stream=raw_request.stream, stream=True,
max_tokens=raw_request.max_tokens, max_tokens=raw_request.max_tokens,
temperature=raw_request.temperature, temperature=raw_request.temperature,
request_id=str(uuid.uuid4()), request_id=str(uuid.uuid4()),
......
...@@ -266,8 +266,10 @@ class VllmPDWorker(VllmBaseWorker): ...@@ -266,8 +266,10 @@ class VllmPDWorker(VllmBaseWorker):
request = vLLMMultimodalRequest.model_validate(request) request = vLLMMultimodalRequest.model_validate(request)
logger.debug(f"Received PD request: {{ id: {request.request_id} }}.") logger.debug(f"Received PD request: {{ id: {request.request_id} }}.")
embeddings, descriptor = None, None if (
request.multimodal_input.image_url is None
and request.multimodal_input.video_url is None
):
# Process embeddings using the connector # Process embeddings using the connector
# Create a descriptor based on the embedding shape. # Create a descriptor based on the embedding shape.
embeddings = torch.empty( embeddings = torch.empty(
...@@ -277,10 +279,6 @@ class VllmPDWorker(VllmBaseWorker): ...@@ -277,10 +279,6 @@ class VllmPDWorker(VllmBaseWorker):
) )
descriptor = connect.Descriptor(embeddings) descriptor = connect.Descriptor(embeddings)
if (
request.multimodal_input.image_url is None
and request.multimodal_input.video_url is None
):
if descriptor is None: if descriptor is None:
raise RuntimeError( raise RuntimeError(
"Descriptor is None in PD worker - cannot process embeddings" "Descriptor is None in PD worker - cannot process embeddings"
......
...@@ -169,6 +169,7 @@ vllm_configs = { ...@@ -169,6 +169,7 @@ vllm_configs = {
], ],
repeat_count=1, repeat_count=1,
expected_response=["rabbit"], expected_response=["rabbit"],
temperature=0.7,
) )
], ],
), ),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment