Unverified Commit 48911230 authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

test(fault_tolerance): replace hand-rolled SSE parser with openai SDK (#8536)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.7 (1M context) <noreply@anthropic.com>
parent a2a2753d
...@@ -90,3 +90,4 @@ repos: ...@@ -90,3 +90,4 @@ repos:
- filelock - filelock
- pyyaml - pyyaml
- prometheus_client>=0.23.1 - prometheus_client>=0.23.1
- openai
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import json
import logging import logging
import re import re
import threading import threading
...@@ -9,6 +8,7 @@ import time ...@@ -9,6 +8,7 @@ import time
import pytest import pytest
import requests import requests
from openai import APIError, OpenAI
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
from tests.utils.managed_process import ( from tests.utils.managed_process import (
...@@ -46,34 +46,18 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess): ...@@ -46,34 +46,18 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess):
) )
def _parse_completion_sse_content(line: str) -> str | Exception | None: def _make_client(frontend_port: int) -> OpenAI:
""" """Build an OpenAI client pointed at the test frontend.
Parse an SSE line from the completions API and extract the text content.
Args:
line: Raw SSE line string
Returns: max_retries=0 so fault-tolerance tests see the first error instead of
str: The text content if found silent retries; api_key is a placeholder since the frontend doesn't auth.
Exception: If error event or parse error
None: If no content (e.g., [DONE] or empty)
""" """
if line.startswith("event: error"): return OpenAI(
return Exception(f"SSE error event received: {line}") base_url=f"http://localhost:{frontend_port}/v1",
api_key="not-needed",
if not line.startswith("data: "): max_retries=0,
return None # Skip non-data lines timeout=240,
)
data_str = line[6:] # Remove "data: " prefix
if data_str == "[DONE]":
return None
try:
chunk = json.loads(data_str)
text = chunk["choices"][0].get("text")
return text # May be None if no text content
except Exception as e:
return Exception(f"Error parsing response chunk: {e}")
def start_completion_request( def start_completion_request(
...@@ -102,14 +86,6 @@ def start_completion_request( ...@@ -102,14 +86,6 @@ def start_completion_request(
prompt = "Tell me a long long long story about yourself?" prompt = "Tell me a long long long story about yourself?"
if use_long_prompt: if use_long_prompt:
prompt += " Make sure it is" + " long" * 8000 + "!" prompt += " Make sure it is" + " long" * 8000 + "!"
timeout = 240 # Extended timeout for long request
payload = {
"model": FAULT_TOLERANCE_MODEL_NAME,
"prompt": prompt,
"stream": stream,
}
headers = {"Content-Type": "application/json"}
logger.info( logger.info(
f"Sending completion request (stream={stream}) with prompt: '{prompt[:50]}...'" f"Sending completion request (stream={stream}) with prompt: '{prompt[:50]}...'"
...@@ -118,45 +94,30 @@ def start_completion_request( ...@@ -118,45 +94,30 @@ def start_completion_request(
response_list.append((None, time.time())) # start timestamp response_list.append((None, time.time())) # start timestamp
try: try:
with requests.post( client = _make_client(frontend_port)
f"http://localhost:{frontend_port}/v1/completions",
headers=headers,
json=payload,
timeout=timeout,
stream=stream,
) as response:
logger.info(
f"Received response with status code: {response.status_code}"
)
if response.status_code != 200:
response_list.append(
(
Exception(
f"Request failed with status {response.status_code}: {response.text}"
),
time.time(),
)
)
return
if stream: if stream:
for line in response.iter_lines(): for chunk in client.completions.create(
if line: model=FAULT_TOLERANCE_MODEL_NAME,
content = _parse_completion_sse_content( prompt=prompt,
line.decode("utf-8") stream=True,
) ):
if content is not None: text = chunk.choices[0].text if chunk.choices else None
response_list.append((content, time.time())) # Match the original hand-rolled parser: keep empty strings,
# drop only None. Empty chunks (e.g. the first stream frame)
# still count as a response arrival for delay measurement.
if text is not None:
response_list.append((text, time.time()))
else: else:
try: resp = client.completions.create(
content = response.json()["choices"][0]["text"] model=FAULT_TOLERANCE_MODEL_NAME,
response_list.append((content, time.time())) prompt=prompt,
except Exception as e: stream=False,
response_list.append(
(Exception(f"Error parsing response: {e}"), time.time())
) )
response_list.append((resp.choices[0].text, time.time()))
except Exception as e: except Exception as e:
# openai.APIError subclasses cover HTTP non-200, mid-stream
# structured `data: {"error": {...}}` frames, connection failures,
# and timeouts. Non-openai exceptions (network, etc.) also bubble.
logger.error(f"Request failed with error: {e}") logger.error(f"Request failed with error: {e}")
response_list.append((e, time.time())) response_list.append((e, time.time()))
...@@ -166,36 +127,6 @@ def start_completion_request( ...@@ -166,36 +127,6 @@ def start_completion_request(
return request_thread, response_list return request_thread, response_list
def _parse_chat_completion_sse_content(line: str) -> str | Exception | None:
"""
Parse an SSE line and extract the content.
Args:
line: Raw SSE line string
Returns:
str: The content delta if found
Exception: If error event or parse error
None: If no content (e.g., [DONE] or empty delta)
"""
if line.startswith("event: error"):
return Exception(f"SSE error event received: {line}")
if not line.startswith("data: "):
return None # Skip non-data lines
data_str = line[6:] # Remove "data: " prefix
if data_str == "[DONE]":
return None
try:
chunk = json.loads(data_str)
content = chunk["choices"][0]["delta"].get("content")
return content # May be None if delta has no content
except Exception as e:
return Exception(f"Error parsing response chunk: {e}")
def start_chat_completion_request( def start_chat_completion_request(
frontend_port: int, stream: bool, use_long_prompt: bool = False frontend_port: int, stream: bool, use_long_prompt: bool = False
) -> tuple: ) -> tuple:
...@@ -222,14 +153,6 @@ def start_chat_completion_request( ...@@ -222,14 +153,6 @@ def start_chat_completion_request(
prompt = "Tell me a long long long story about yourself?" prompt = "Tell me a long long long story about yourself?"
if use_long_prompt: if use_long_prompt:
prompt += " Make sure it is" + " long" * 8000 + "!" prompt += " Make sure it is" + " long" * 8000 + "!"
timeout = 240 # Extended timeout for long request
payload = {
"model": FAULT_TOLERANCE_MODEL_NAME,
"messages": [{"role": "user", "content": prompt}],
"stream": stream,
}
headers = {"Content-Type": "application/json"}
logger.info( logger.info(
f"Sending chat completion request (stream={stream}) with prompt: '{prompt[:50]}...'" f"Sending chat completion request (stream={stream}) with prompt: '{prompt[:50]}...'"
...@@ -238,45 +161,31 @@ def start_chat_completion_request( ...@@ -238,45 +161,31 @@ def start_chat_completion_request(
response_list.append((None, time.time())) # start timestamp response_list.append((None, time.time())) # start timestamp
try: try:
with requests.post( client = _make_client(frontend_port)
f"http://localhost:{frontend_port}/v1/chat/completions",
headers=headers,
json=payload,
timeout=timeout,
stream=stream,
) as response:
logger.info(
f"Received response with status code: {response.status_code}"
)
if response.status_code != 200:
response_list.append(
(
Exception(
f"Request failed with status {response.status_code}: {response.text}"
),
time.time(),
)
)
return
if stream: if stream:
for line in response.iter_lines(): for chunk in client.chat.completions.create(
if line: model=FAULT_TOLERANCE_MODEL_NAME,
content = _parse_chat_completion_sse_content( messages=[{"role": "user", "content": prompt}],
line.decode("utf-8") stream=True,
) ):
content = chunk.choices[0].delta.content if chunk.choices else None
# Match the original hand-rolled parser: keep empty strings,
# drop only None. Empty chunks (e.g. the first `role`-only
# stream frame) still count as a response arrival for delay
# measurement.
if content is not None: if content is not None:
response_list.append((content, time.time())) response_list.append((content, time.time()))
else: else:
try: resp = client.chat.completions.create(
content = response.json()["choices"][0]["message"]["content"] model=FAULT_TOLERANCE_MODEL_NAME,
response_list.append((content, time.time())) messages=[{"role": "user", "content": prompt}],
except Exception as e: stream=False,
response_list.append(
(Exception(f"Error parsing response: {e}"), time.time())
) )
response_list.append((resp.choices[0].message.content, time.time()))
except Exception as e: except Exception as e:
# openai.APIError subclasses cover HTTP non-200, mid-stream
# structured `data: {"error": {...}}` frames, connection failures,
# and timeouts. Non-openai exceptions also bubble for visibility.
logger.error(f"Request failed with error: {e}") logger.error(f"Request failed with error: {e}")
response_list.append((e, time.time())) response_list.append((e, time.time()))
...@@ -634,12 +543,12 @@ def run_migration_test( ...@@ -634,12 +543,12 @@ def run_migration_test(
pytest.fail( pytest.fail(
"Request succeeded unexpectedly when migration should have failed" "Request succeeded unexpectedly when migration should have failed"
) )
except Exception as e: except APIError as e:
error_str = str(e) # Expected: openai.APIError covers mid-stream structured error
assert ( # frames (DIS-1768 contract) and HTTP non-200 responses. A typed
"SSE error event received:" in error_str # check is more robust than matching the exception's stringified
or "Request failed with status" in error_str # message against a specific wire-format prefix.
), f"Unexpected error: {e}" logger.info(f"Got expected APIError: {e}")
try: try:
verify_migration_occurred(frontend) verify_migration_occurred(frontend)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment