"""Send a chat completion request to the frontend"""
payload={
"model":FAULT_TOLERANCE_MODEL_NAME,
"messages":[{"role":"user","content":prompt}],
"max_tokens":max_tokens,
"stream":stream,
}
headers={"Content-Type":"application/json"}
logger.info(
f"Sending chat completion request (stream={stream}) with prompt: '{prompt[:50]}...' and max_tokens: {max_tokens}"
)
session=requests.Session()
try:
response=session.post(
"http://localhost:8000/v1/chat/completions",
headers=headers,
json=payload,
timeout=timeout,
stream=stream,
)
logger.info(f"Received response with status code: {response.status_code}")
returnresponse
exceptrequests.exceptions.Timeout:
logger.error(f"Request timed out after {timeout} seconds")
raise
exceptrequests.exceptions.RequestExceptionase:
logger.error(f"Request failed with error: {e}")
raise
defsend_request_and_cancel(
request_type:str="completion",
timeout:int|float=1,
use_long_prompt:bool=False,
):
"""Send a request with short timeout to trigger cancellation"""
logger.info(f"Sending {request_type} request to be cancelled...")
prompt="Tell me a very long and detailed story about the history of artificial intelligence, including all major milestones, researchers, and breakthroughs?"
logger.info(f"Received response with status code: {response.status_code}")
returnresponse
exceptrequests.exceptions.Timeout:
logger.error(f"Request timed out after {timeout} seconds")
raise
exceptrequests.exceptions.RequestExceptionase:
logger.error(f"Request failed with error: {e}")
raise
defsend_request_and_cancel(
request_type:str="completion",
timeout:int|float=1,
use_long_prompt:bool=False,
):
"""Send a request with short timeout to trigger cancellation"""
logger.info(f"Sending {request_type} request to be cancelled...")
prompt="Tell me a very long and detailed story about the history of artificial intelligence, including all major milestones, researchers, and breakthroughs?"