Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
c6dfc3cd
"tests/python/pytorch/sparse/utils.py" did not exist on "0698e91a0e4b40bd4a5a4e59205d098e1bb3d3c9"
Unverified
Commit
c6dfc3cd
authored
Jul 12, 2023
by
xcnick
Committed by
GitHub
Jul 12, 2023
Browse files
Fix handling of special tokens in decoding. (#418)
parent
51be3651
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
3 deletions
+6
-3
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+3
-2
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+3
-1
No files found.
vllm/engine/llm_engine.py
View file @
c6dfc3cd
...
@@ -276,8 +276,9 @@ class LLMEngine:
...
@@ -276,8 +276,9 @@ class LLMEngine:
seq
.
get_last_token_id
(),
seq
.
get_last_token_id
(),
skip_special_tokens
=
True
,
skip_special_tokens
=
True
,
)
)
seq
.
output_tokens
.
append
(
new_token
)
if
new_token
is
not
None
:
seq
.
output_text
=
new_output_text
seq
.
output_tokens
.
append
(
new_token
)
seq
.
output_text
=
new_output_text
def
_stop_sequences
(
self
,
seq_groups
:
List
[
SequenceGroup
])
->
None
:
def
_stop_sequences
(
self
,
seq_groups
:
List
[
SequenceGroup
])
->
None
:
"""Stop the finished sequences."""
"""Stop the finished sequences."""
...
...
vllm/transformers_utils/tokenizer.py
View file @
c6dfc3cd
...
@@ -80,6 +80,8 @@ def detokenize_incrementally(
...
@@ -80,6 +80,8 @@ def detokenize_incrementally(
new_token: The new token as a string.
new_token: The new token as a string.
output_text: The new output text as a string.
output_text: The new output text as a string.
"""
"""
if
skip_special_tokens
and
(
new_token_id
in
tokenizer
.
all_special_ids
):
return
None
,
prev_output_tokens
new_token
=
tokenizer
.
convert_ids_to_tokens
(
new_token
=
tokenizer
.
convert_ids_to_tokens
(
new_token_id
,
skip_special_tokens
=
skip_special_tokens
)
new_token_id
,
skip_special_tokens
=
skip_special_tokens
)
output_tokens
=
prev_output_tokens
+
[
new_token
]
output_tokens
=
prev_output_tokens
+
[
new_token
]
...
@@ -99,7 +101,7 @@ def detokenize_incrementally(
...
@@ -99,7 +101,7 @@ def detokenize_incrementally(
sub_texts
=
[]
sub_texts
=
[]
current_sub_text
=
[]
current_sub_text
=
[]
for
token
in
output_tokens
:
for
token
in
output_tokens
:
if
skip_special_tokens
and
token
in
tokenizer
.
all_special_
id
s
:
if
skip_special_tokens
and
token
in
tokenizer
.
all_special_
token
s
:
continue
continue
if
token
in
tokenizer
.
added_tokens_encoder
:
if
token
in
tokenizer
.
added_tokens_encoder
:
if
current_sub_text
:
if
current_sub_text
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment