Unverified Commit 8cb47d04 authored by MatejKosec's avatar MatejKosec Committed by GitHub
Browse files

feat: responses API compliance with upstream type alignment (#6089)


Signed-off-by: default avatarMatej Kosec <mkosec@nvidia.com>
Co-authored-by: default avatarIshan Dhanani <ishandhanani@gmail.com>
parent f8d0a9f9
This diff is collapsed.
This diff is collapsed.
......@@ -156,6 +156,7 @@ def run_serve_deployment(
payload=payload.body,
timeout=payload.timeout,
method=payload.method,
stream=payload.http_stream,
)
server_process.check_response(payload, response)
......
......@@ -23,6 +23,8 @@ from tests.utils.payload_builder import (
embedding_payload,
embedding_payload_default,
metric_payload_default,
responses_payload_default,
responses_stream_payload_default,
)
logger = logging.getLogger(__name__)
......@@ -60,6 +62,8 @@ sglang_configs = {
request_payloads=[
chat_payload_default(),
completion_payload_default(),
responses_payload_default(),
responses_stream_payload_default(),
metric_payload_default(min_num_requests=6, backend="sglang"),
],
),
......
......@@ -52,6 +52,7 @@ def send_request(
timeout: float = 30.0,
method: str = "POST",
log_level: int = 20,
stream: bool = False,
) -> requests.Response:
"""
Send an HTTP request to the engine with detailed logging.
......@@ -97,7 +98,7 @@ def send_request(
if method_upper == "GET":
response = requests.get(url, params=payload, timeout=timeout)
elif method_upper == "POST":
response = requests.post(url, json=payload, timeout=timeout)
response = requests.post(url, json=payload, timeout=timeout, stream=stream)
else:
# Fallback for other methods if needed
response = requests.request(
......@@ -117,8 +118,14 @@ def send_request(
logger.debug("Response headers: %s", dict(response.headers))
# Try to log response body (truncated if too long)
# Skip body logging for streaming responses to avoid consuming the stream
if stream:
logger.debug("Response body: <streaming, not logged>")
else:
try:
if response.headers.get("content-type", "").startswith("application/json"):
if response.headers.get("content-type", "").startswith(
"application/json"
):
response_data = response.json()
response_str = json.dumps(response_data, indent=2)
if len(response_str) > 1000:
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment