fix exceed session len core dump for chat and generate (#366)

ce21a318 · AllentDan · GitHub · 71ade772 · ce21a318 · ce21a318
Unverified Commit ce21a318 authored Sep 07, 2023 by AllentDan Committed by GitHub Sep 07, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

lmdeploy/serve/async_engine.py lmdeploy/serve/async_engine.py +1 -1

lmdeploy/turbomind/chat.py lmdeploy/turbomind/chat.py +3 -3

No files found.
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -112,7 +112,7 @@ class AsyncEngine:
        prompt = self.model.messages2prompt(messages, sequence_start)
        input_ids = self.tokenizer.encode(prompt)
        finish_reason = 'stop' if stop else None
-        if not sequence_end and self.steps[str(session_id)] + len(
+        if self.steps[str(session_id)] + len(
                input_ids) >= self.tm_model.session_len:
            finish_reason = 'length'
            yield GenOut('', self.steps[str(session_id)], len(input_ids), 0,

--- a/lmdeploy/turbomind/chat.py
+++ b/lmdeploy/turbomind/chat.py
@@ -74,12 +74,12 @@ def main(model_path,
            seed = random.getrandbits(64)
        else:
            print(f'session {session_id}')
-            if step >= tm_model.session_len:
+            prompt = model.get_prompt(prompt, nth_round == 1)
+            input_ids = tokenizer.encode(prompt)
+            if step + len(input_ids) >= tm_model.session_len:
                print('WARNING: exceed session max length.'
                      ' Please end the session.')
                continue
-            prompt = model.get_prompt(prompt, nth_round == 1)
-            input_ids = tokenizer.encode(prompt)
            print(f'{prompt} ', end='', flush=True)
            response_size = 0
            for outputs in generator.stream_infer(