fix `finish_reason` (#816)

16b4b823 · AllentDan · GitHub · a5b67b95 · 16b4b823 · 16b4b823
Unverified Commit 16b4b823 authored Dec 12, 2023 by AllentDan Committed by GitHub Dec 12, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 4 deletions

lmdeploy/serve/async_engine.py lmdeploy/serve/async_engine.py +7 -4

lmdeploy/serve/openai/api_server.py lmdeploy/serve/openai/api_server.py +2 -0

No files found.
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -204,7 +204,7 @@ class AsyncEngine:
        if do_preprocess:
            prompt = self.model.messages2prompt(prompt, sequence_start)
        input_ids = self.tokenizer.encode(prompt, add_bos=sequence_start)
-        finish_reason = 'stop' if stop else None
+        finish_reason = None
        if self.id2step[str(session_id)] + len(
                input_ids) + request_output_len >= self.tm_model.session_len:
            finish_reason = 'length'
@@ -247,11 +247,14 @@ class AsyncEngine:
                                 len(input_ids), tokens, finish_reason)
                    response_size = tokens

+                finish_reason = 'length' \
+                    if tokens >= request_output_len else 'stop'
                # `response_size` might be note updated since
                # ` if response.endswith('�')`
-                if response_size != tokens:
-                    yield GenOut(response, self.id2step[str(session_id)],
-                                 len(input_ids), tokens, finish_reason)
+                if response_size == tokens:
+                    response = ''  # avaid returning the last response twice
+                yield GenOut(response, self.id2step[str(session_id)],
+                             len(input_ids), tokens, finish_reason)
                # update step
                self.id2step[str(session_id)] += len(input_ids) + tokens
                if sequence_end or stop:

--- a/lmdeploy/serve/openai/api_server.py
+++ b/lmdeploy/serve/openai/api_server.py
@@ -179,6 +179,7 @@ async def chat_completions_v1(request: ChatCompletionRequest,
            response_json = create_stream_response_json(
                index=0,
                text=res.response,
+                finish_reason=res.finish_reason,
            )
            yield f'data: {response_json}\n\n'
        yield 'data: [DONE]\n\n'
@@ -329,6 +330,7 @@ async def completions_v1(request: CompletionRequest,
                response_json = create_stream_response_json(
                    index=0,
                    text=res.response,
+                    finish_reason=res.finish_reason,
                )
                yield f'data: {response_json}\n\n'
        yield 'data: [DONE]\n\n'