Unverified Commit 7915a259 authored by Pavel Iakubovskii's avatar Pavel Iakubovskii Committed by GitHub
Browse files

Fix donut token2json multiline (#30300)

* Fix multiline processing

* Update test for token2json
parent b65df514
......@@ -149,7 +149,9 @@ class DonutProcessor(ProcessorMixin):
end_token = end_token.group()
start_token_escaped = re.escape(start_token)
end_token_escaped = re.escape(end_token)
content = re.search(f"{start_token_escaped}(.*?){end_token_escaped}", tokens, re.IGNORECASE)
content = re.search(
f"{start_token_escaped}(.*?){end_token_escaped}", tokens, re.IGNORECASE | re.DOTALL
)
if content is not None:
content = content.group(1).strip()
if r"<s_" in content and r"</s_" in content: # non-leaf node
......
......@@ -35,6 +35,8 @@ class DonutProcessorTest(unittest.TestCase):
"zip": "30301",
"phone": "123-4567",
"nicknames": [{"nickname": "Johnny"}, {"nickname": "JD"}],
"multiline": "text\nwith\nnewlines",
"empty": "",
}
sequence = (
......@@ -42,6 +44,8 @@ class DonutProcessorTest(unittest.TestCase):
"<s_state>GA</s_state><s_zip>30301</s_zip><s_phone>123-4567</s_phone>"
"<s_nicknames><s_nickname>Johnny</s_nickname>"
"<sep/><s_nickname>JD</s_nickname></s_nicknames>"
"<s_multiline>text\nwith\nnewlines</s_multiline>"
"<s_empty></s_empty>"
)
actual_json = self.processor.token2json(sequence)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment