Unverified Commit d5080aea authored by Flora Feng's avatar Flora Feng Committed by GitHub
Browse files

[Refactor] Remove deadcode in Responses API serving (#36726)


Signed-off-by: default avatarsfeng33 <4florafeng@gmail.com>
Co-authored-by: default avatarSigned-off-by: yewentao256 <zhyanwentao@126.com>
parent f22d6e02
...@@ -1102,7 +1102,6 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -1102,7 +1102,6 @@ class OpenAIServingResponses(OpenAIServing):
event_deque: deque[StreamingResponsesResponse] = deque() event_deque: deque[StreamingResponsesResponse] = deque()
new_event_signal = asyncio.Event() new_event_signal = asyncio.Event()
self.event_store[request.request_id] = (event_deque, new_event_signal) self.event_store[request.request_id] = (event_deque, new_event_signal)
response = None
generator = self.responses_stream_generator(request, *args, **kwargs) generator = self.responses_stream_generator(request, *args, **kwargs)
try: try:
async for event in generator: async for event in generator:
...@@ -1111,15 +1110,6 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -1111,15 +1110,6 @@ class OpenAIServingResponses(OpenAIServing):
finally: finally:
new_event_signal.set() new_event_signal.set()
if response is not None and isinstance(response, ErrorResponse):
# If the request has failed, update the status to "failed".
response_id = request.request_id
async with self.response_store_lock:
stored_response = self.response_store.get(response_id)
assert stored_response is not None
if stored_response.status not in ("completed", "cancelled"):
stored_response.status = "failed"
async def _run_background_request( async def _run_background_request(
self, self,
request: ResponsesRequest, request: ResponsesRequest,
...@@ -1226,19 +1216,6 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -1226,19 +1216,6 @@ class OpenAIServingResponses(OpenAIServing):
param="response_id", param="response_id",
) )
def _make_store_not_supported_error(self) -> ErrorResponse:
return self.create_error_response(
err_type="invalid_request_error",
message=(
"`store=True` (default) is not supported. Please set "
"`store=False` in Responses API or set "
"`VLLM_ENABLE_RESPONSES_API_STORE=1` in the env var when "
"starting the vLLM server."
),
status_code=HTTPStatus.BAD_REQUEST,
param="store",
)
async def _process_simple_streaming_events( async def _process_simple_streaming_events(
self, self,
request: ResponsesRequest, request: ResponsesRequest,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment