"ppocr/git@developer.sourcefind.cn:wangsen/paddle_dbnet.git" did not exist on "ce22e2ff16e0c3d02f4ce891a0afddb84c17ef44"
Unverified Commit f6d5046a authored by Funtowicz Morgan's avatar Funtowicz Morgan Committed by GitHub
Browse files

Override get_vocab for fast tokenizer. (#4717)

parent 88762a2f
...@@ -2368,6 +2368,9 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer): ...@@ -2368,6 +2368,9 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
def _convert_id_to_token(self, index: int) -> Optional[str]: def _convert_id_to_token(self, index: int) -> Optional[str]:
return self._tokenizer.id_to_token(int(index)) return self._tokenizer.id_to_token(int(index))
def get_vocab(self):
return self._tokenizer.get_vocab(True)
def convert_tokens_to_string(self, tokens: List[int], skip_special_tokens: bool = False) -> str: def convert_tokens_to_string(self, tokens: List[int], skip_special_tokens: bool = False) -> str:
return self._tokenizer.decode(tokens, skip_special_tokens) return self._tokenizer.decode(tokens, skip_special_tokens)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment