"git@developer.sourcefind.cn:wangsen/paddle_dbnet.git" did not exist on "b1ba0f7a313af50bb222ccc8ad7aba47767b9b8b"
Commit 90debb9f authored by Dirk Groeneveld's avatar Dirk Groeneveld Committed by Lysandre Debut
Browse files

Keep even the first of the special tokens intact while lowercasing.

parent b98ff885
...@@ -642,7 +642,7 @@ class PreTrainedTokenizer(object): ...@@ -642,7 +642,7 @@ class PreTrainedTokenizer(object):
def lowercase_text(t): def lowercase_text(t):
# convert non-special tokens to lowercase # convert non-special tokens to lowercase
escaped_special_toks = [re.escape(s_tok) for s_tok in all_special_tokens] escaped_special_toks = [re.escape(s_tok) for s_tok in all_special_tokens]
pattern = r'(^' + r'|'.join(escaped_special_toks) + r')|' + \ pattern = r'(' + r'|'.join(escaped_special_toks) + r')|' + \
r'(.+?)' r'(.+?)'
return re.sub( return re.sub(
pattern, pattern,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment