"tests/test_modeling_deberta.py" did not exist on "75d5f98fd2a154bb5bfc0879c4a6e389c6789be5"
Commit 90debb9f authored by Dirk Groeneveld's avatar Dirk Groeneveld Committed by Lysandre Debut
Browse files

Keep even the first of the special tokens intact while lowercasing.

parent b98ff885
......@@ -642,7 +642,7 @@ class PreTrainedTokenizer(object):
def lowercase_text(t):
# convert non-special tokens to lowercase
escaped_special_toks = [re.escape(s_tok) for s_tok in all_special_tokens]
pattern = r'(^' + r'|'.join(escaped_special_toks) + r')|' + \
pattern = r'(' + r'|'.join(escaped_special_toks) + r')|' + \
r'(.+?)'
return re.sub(
pattern,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment