Unverified Commit 8cb47d04 authored by MatejKosec's avatar MatejKosec Committed by GitHub
Browse files

feat: responses API compliance with upstream type alignment (#6089)


Signed-off-by: default avatarMatej Kosec <mkosec@nvidia.com>
Co-authored-by: default avatarIshan Dhanani <ishandhanani@gmail.com>
parent f8d0a9f9
This diff is collapsed.
This diff is collapsed.
...@@ -156,6 +156,7 @@ def run_serve_deployment( ...@@ -156,6 +156,7 @@ def run_serve_deployment(
payload=payload.body, payload=payload.body,
timeout=payload.timeout, timeout=payload.timeout,
method=payload.method, method=payload.method,
stream=payload.http_stream,
) )
server_process.check_response(payload, response) server_process.check_response(payload, response)
......
...@@ -23,6 +23,8 @@ from tests.utils.payload_builder import ( ...@@ -23,6 +23,8 @@ from tests.utils.payload_builder import (
embedding_payload, embedding_payload,
embedding_payload_default, embedding_payload_default,
metric_payload_default, metric_payload_default,
responses_payload_default,
responses_stream_payload_default,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -60,6 +62,8 @@ sglang_configs = { ...@@ -60,6 +62,8 @@ sglang_configs = {
request_payloads=[ request_payloads=[
chat_payload_default(), chat_payload_default(),
completion_payload_default(), completion_payload_default(),
responses_payload_default(),
responses_stream_payload_default(),
metric_payload_default(min_num_requests=6, backend="sglang"), metric_payload_default(min_num_requests=6, backend="sglang"),
], ],
), ),
......
...@@ -52,6 +52,7 @@ def send_request( ...@@ -52,6 +52,7 @@ def send_request(
timeout: float = 30.0, timeout: float = 30.0,
method: str = "POST", method: str = "POST",
log_level: int = 20, log_level: int = 20,
stream: bool = False,
) -> requests.Response: ) -> requests.Response:
""" """
Send an HTTP request to the engine with detailed logging. Send an HTTP request to the engine with detailed logging.
...@@ -97,7 +98,7 @@ def send_request( ...@@ -97,7 +98,7 @@ def send_request(
if method_upper == "GET": if method_upper == "GET":
response = requests.get(url, params=payload, timeout=timeout) response = requests.get(url, params=payload, timeout=timeout)
elif method_upper == "POST": elif method_upper == "POST":
response = requests.post(url, json=payload, timeout=timeout) response = requests.post(url, json=payload, timeout=timeout, stream=stream)
else: else:
# Fallback for other methods if needed # Fallback for other methods if needed
response = requests.request( response = requests.request(
...@@ -117,8 +118,14 @@ def send_request( ...@@ -117,8 +118,14 @@ def send_request(
logger.debug("Response headers: %s", dict(response.headers)) logger.debug("Response headers: %s", dict(response.headers))
# Try to log response body (truncated if too long) # Try to log response body (truncated if too long)
# Skip body logging for streaming responses to avoid consuming the stream
if stream:
logger.debug("Response body: <streaming, not logged>")
else:
try: try:
if response.headers.get("content-type", "").startswith("application/json"): if response.headers.get("content-type", "").startswith(
"application/json"
):
response_data = response.json() response_data = response.json()
response_str = json.dumps(response_data, indent=2) response_str = json.dumps(response_data, indent=2)
if len(response_str) > 1000: if len(response_str) > 1000:
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment