"vscode:/vscode.git/clone" did not exist on "3b23d57c960c77edbc31f9bcae9dcb69a491fd19"
Unverified Commit fdd6f2ad authored by Reagan Lee's avatar Reagan Lee Committed by GitHub
Browse files

Convert online APIs to use Renderer (#34084)


Signed-off-by: default avatarReagan Lee <“reaganjlee@gmail.com”>
Co-authored-by: default avatarReagan Lee <“reaganjlee@gmail.com”>
parent 33bcd3dc
...@@ -471,15 +471,31 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -471,15 +471,31 @@ class OpenAISpeechToText(OpenAIServing):
lora_request=lora_request, lora_request=lora_request,
) )
list_result_generator = [ trace_headers = (
self.engine_client.generate( None
if raw_request is None
else await self._get_trace_headers(raw_request.headers)
)
list_result_generator = []
for i, prompt in enumerate(prompts):
request_id_item = f"{request_id}_{i}"
engine_request = self.input_processor.process_inputs(
request_id_item,
prompt, prompt,
sampling_params, sampling_params,
f"{request_id}_{i}",
lora_request=lora_request, lora_request=lora_request,
trace_headers=trace_headers,
priority=0,
)
list_result_generator.append(
self.engine_client.generate(
engine_request,
sampling_params,
request_id_item,
lora_request=lora_request,
)
) )
for i, prompt in enumerate(prompts)
]
except ValueError as e: except ValueError as e:
return self.create_error_response(e) return self.create_error_response(e)
......
...@@ -99,8 +99,6 @@ class ServingTokens(OpenAIServing): ...@@ -99,8 +99,6 @@ class ServingTokens(OpenAIServing):
if raw_request: if raw_request:
raw_request.state.request_metadata = request_metadata raw_request.state.request_metadata = request_metadata
# TODO(NickLucche): Change to EngineCoreRequest once Renderer work is
# completed
engine_prompts = await self._preprocess_completion( engine_prompts = await self._preprocess_completion(
request, request,
prompt_input=request.token_ids, prompt_input=request.token_ids,
...@@ -132,16 +130,26 @@ class ServingTokens(OpenAIServing): ...@@ -132,16 +130,26 @@ class ServingTokens(OpenAIServing):
tok_params = request.build_tok_params(self.model_config) tok_params = request.build_tok_params(self.model_config)
tokenization_kwargs = tok_params.get_encode_kwargs() tokenization_kwargs = tok_params.get_encode_kwargs()
result_generator = self.engine_client.generate( engine_request = self.input_processor.process_inputs(
request_id,
engine_prompt, engine_prompt,
sampling_params, sampling_params,
request_id,
lora_request=lora_request, lora_request=lora_request,
tokenization_kwargs=tokenization_kwargs, tokenization_kwargs=tokenization_kwargs,
trace_headers=trace_headers, trace_headers=trace_headers,
priority=request.priority, priority=request.priority,
) )
result_generator = self.engine_client.generate(
engine_request,
sampling_params,
request_id,
lora_request=lora_request,
trace_headers=trace_headers,
priority=request.priority,
tokenization_kwargs=tokenization_kwargs,
)
except ValueError as e: except ValueError as e:
return self.create_error_response(str(e)) return self.create_error_response(str(e))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment