Unverified Commit beac8dd4 authored by Ricardo Lu's avatar Ricardo Lu Committed by GitHub
Browse files

fix: don't skip first special token. (#1497)

parent 28b47d1e
...@@ -120,6 +120,10 @@ def detokenize_incrementally( ...@@ -120,6 +120,10 @@ def detokenize_incrementally(
# tokenizers (bigger = more conservative). # tokenizers (bigger = more conservative).
# Subtract 1 extra to account for the generated token. # Subtract 1 extra to account for the generated token.
prefix_offset = max(len(output_tokens) - 6, 0) prefix_offset = max(len(output_tokens) - 6, 0)
# If the first new token is a special token, we can't skip 1 extra token
if skip_special_tokens and new_token_id in tokenizer.all_special_ids:
read_offset = max(len(output_tokens), 0)
else:
read_offset = max(len(output_tokens) - 1, 0) read_offset = max(len(output_tokens) - 1, 0)
else: else:
# Put new_token_id in a list so skip_special_tokens is respected # Put new_token_id in a list so skip_special_tokens is respected
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment