Unverified Commit e97deca9 authored by nour-elkamel's avatar nour-elkamel Committed by GitHub
Browse files

fix : escape key of start_token from special characters before search...

fix : escape key of start_token from special characters before search end_token in token2json function of DonutProcessor  (#25472)

fix : escape key of start_token from special characters before searching for end_token
parent 0ebe7ae1
......@@ -138,7 +138,9 @@ class DonutProcessor(ProcessorMixin):
if start_token is None:
break
key = start_token.group(1)
end_token = re.search(rf"</s_{key}>", tokens, re.IGNORECASE)
key_escaped = re.escape(key)
end_token = re.search(rf"</s_{key_escaped}>", tokens, re.IGNORECASE)
start_token = start_token.group()
if end_token is None:
tokens = tokens.replace(start_token, "")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment