"tests/python/pytorch/sparse/utils.py" did not exist on "0698e91a0e4b40bd4a5a4e59205d098e1bb3d3c9"
Unverified Commit c6dfc3cd authored by xcnick's avatar xcnick Committed by GitHub
Browse files

Fix handling of special tokens in decoding. (#418)

parent 51be3651
...@@ -276,8 +276,9 @@ class LLMEngine: ...@@ -276,8 +276,9 @@ class LLMEngine:
seq.get_last_token_id(), seq.get_last_token_id(),
skip_special_tokens=True, skip_special_tokens=True,
) )
seq.output_tokens.append(new_token) if new_token is not None:
seq.output_text = new_output_text seq.output_tokens.append(new_token)
seq.output_text = new_output_text
def _stop_sequences(self, seq_groups: List[SequenceGroup]) -> None: def _stop_sequences(self, seq_groups: List[SequenceGroup]) -> None:
"""Stop the finished sequences.""" """Stop the finished sequences."""
......
...@@ -80,6 +80,8 @@ def detokenize_incrementally( ...@@ -80,6 +80,8 @@ def detokenize_incrementally(
new_token: The new token as a string. new_token: The new token as a string.
output_text: The new output text as a string. output_text: The new output text as a string.
""" """
if skip_special_tokens and (new_token_id in tokenizer.all_special_ids):
return None, prev_output_tokens
new_token = tokenizer.convert_ids_to_tokens( new_token = tokenizer.convert_ids_to_tokens(
new_token_id, skip_special_tokens=skip_special_tokens) new_token_id, skip_special_tokens=skip_special_tokens)
output_tokens = prev_output_tokens + [new_token] output_tokens = prev_output_tokens + [new_token]
...@@ -99,7 +101,7 @@ def detokenize_incrementally( ...@@ -99,7 +101,7 @@ def detokenize_incrementally(
sub_texts = [] sub_texts = []
current_sub_text = [] current_sub_text = []
for token in output_tokens: for token in output_tokens:
if skip_special_tokens and token in tokenizer.all_special_ids: if skip_special_tokens and token in tokenizer.all_special_tokens:
continue continue
if token in tokenizer.added_tokens_encoder: if token in tokenizer.added_tokens_encoder:
if current_sub_text: if current_sub_text:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment