"examples/tensorflow/language-modeling/run_mlm.py" did not exist on "04dbea31a9b2e1e0bbb44ea6c2e0074d90cf0ba9"
Unverified Commit 0cbddfb1 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Replace double occurrences as the last step (#11367)

parent 73fde1de
......@@ -453,7 +453,6 @@ class AlbertConverter(SpmConverter):
list_normalizers = [
normalizers.Replace("``", '"'),
normalizers.Replace("''", '"'),
normalizers.Replace(Regex(" {2,}"), " "),
]
if not self.original_tokenizer.keep_accents:
list_normalizers.append(normalizers.NFKD())
......@@ -463,6 +462,7 @@ class AlbertConverter(SpmConverter):
precompiled_charsmap = proto.normalizer_spec.precompiled_charsmap
list_normalizers.append(normalizers.Precompiled(precompiled_charsmap))
list_normalizers.append(normalizers.Replace(Regex(" {2,}"), " "))
return normalizers.Sequence(list_normalizers)
def post_processor(self):
......@@ -641,7 +641,6 @@ class XLNetConverter(SpmConverter):
list_normalizers = [
normalizers.Replace("``", '"'),
normalizers.Replace("''", '"'),
normalizers.Replace(Regex(" {2,}"), " "),
]
if not self.original_tokenizer.keep_accents:
list_normalizers.append(normalizers.NFKD())
......@@ -651,6 +650,7 @@ class XLNetConverter(SpmConverter):
precompiled_charsmap = proto.normalizer_spec.precompiled_charsmap
list_normalizers.append(normalizers.Precompiled(precompiled_charsmap))
list_normalizers.append(normalizers.Replace(Regex(" {2,}"), " "))
return normalizers.Sequence(list_normalizers)
def post_processor(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment