Unverified Commit bf270c28 authored by dagil-nvidia's avatar dagil-nvidia Committed by GitHub
Browse files

refactor: optimize regex patterns in docs scripts (#5777)

parent 59d20d1e
......@@ -53,7 +53,8 @@ class GitHubAlertsTransformer:
def __init__(self):
# Regex to match GitHub alert syntax in text
self.alert_pattern = re.compile(r"^\[!(.*?)\](?:\s+(.*))?$")
# Uses [^\]]* instead of .*? to prevent backtracking on ] characters
self.alert_pattern = re.compile(r"^\[!([^\]]*)\](?:\s+(.*))?$")
def is_github_alert_blockquote(self, node: nodes.block_quote) -> bool:
"""
......
......@@ -39,7 +39,8 @@ dynamo_github_url_reg = re.compile(
)
# relpath_patn = r"]\s*\(\s*([^)]+)\)"
# Hyperlink in a .md file, excluding embedded images.
hyperlink_reg = re.compile(r"((?<!\!)\[[^\]]+\]\s*\(\s*)([^)]+?)(\s*\))")
# Uses greedy [^)]+ instead of lazy [^)]+? to avoid backtracking with \s*
hyperlink_reg = re.compile(r"((?<!\!)\[[^\]]+\]\s*\(\s*)([^)]+)(\))")
exclusions = None
with open(f"{dynamo_docs_abspath}/exclusions.txt", "r") as f:
......@@ -225,7 +226,8 @@ def replace_hyperlink(m, src_doc_path):
should be safe for now.
"""
hyperlink_str = m.group(2)
# Strip trailing whitespace since the greedy regex includes it
hyperlink_str = m.group(2).rstrip()
match = http_reg.match(hyperlink_str)
if match:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment