Unverified Commit 9a4521dd authored by cmathw's avatar cmathw Committed by GitHub
Browse files

Support single token decode for `CodeGenTokenizer` (#28628)

convert token id to list in .decode()
parent 5b5e71dc
...@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, Union ...@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, Union
import numpy as np import numpy as np
import regex as re import regex as re
from ...utils import is_tf_available, is_torch_available, logging from ...utils import is_tf_available, is_torch_available, logging, to_py_obj
if TYPE_CHECKING: if TYPE_CHECKING:
...@@ -352,6 +352,9 @@ class CodeGenTokenizer(PreTrainedTokenizer): ...@@ -352,6 +352,9 @@ class CodeGenTokenizer(PreTrainedTokenizer):
Returns: Returns:
`str`: The decoded sentence. `str`: The decoded sentence.
""" """
token_ids = to_py_obj(token_ids)
decoded_text = super()._decode( decoded_text = super()._decode(
token_ids=token_ids, token_ids=token_ids,
skip_special_tokens=skip_special_tokens, skip_special_tokens=skip_special_tokens,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment