Unverified Commit bf270c28 authored by dagil-nvidia's avatar dagil-nvidia Committed by GitHub
Browse files

refactor: optimize regex patterns in docs scripts (#5777)

parent 59d20d1e
...@@ -53,7 +53,8 @@ class GitHubAlertsTransformer: ...@@ -53,7 +53,8 @@ class GitHubAlertsTransformer:
def __init__(self): def __init__(self):
# Regex to match GitHub alert syntax in text # Regex to match GitHub alert syntax in text
self.alert_pattern = re.compile(r"^\[!(.*?)\](?:\s+(.*))?$") # Uses [^\]]* instead of .*? to prevent backtracking on ] characters
self.alert_pattern = re.compile(r"^\[!([^\]]*)\](?:\s+(.*))?$")
def is_github_alert_blockquote(self, node: nodes.block_quote) -> bool: def is_github_alert_blockquote(self, node: nodes.block_quote) -> bool:
""" """
......
...@@ -39,7 +39,8 @@ dynamo_github_url_reg = re.compile( ...@@ -39,7 +39,8 @@ dynamo_github_url_reg = re.compile(
) )
# relpath_patn = r"]\s*\(\s*([^)]+)\)" # relpath_patn = r"]\s*\(\s*([^)]+)\)"
# Hyperlink in a .md file, excluding embedded images. # Hyperlink in a .md file, excluding embedded images.
hyperlink_reg = re.compile(r"((?<!\!)\[[^\]]+\]\s*\(\s*)([^)]+?)(\s*\))") # Uses greedy [^)]+ instead of lazy [^)]+? to avoid backtracking with \s*
hyperlink_reg = re.compile(r"((?<!\!)\[[^\]]+\]\s*\(\s*)([^)]+)(\))")
exclusions = None exclusions = None
with open(f"{dynamo_docs_abspath}/exclusions.txt", "r") as f: with open(f"{dynamo_docs_abspath}/exclusions.txt", "r") as f:
...@@ -225,7 +226,8 @@ def replace_hyperlink(m, src_doc_path): ...@@ -225,7 +226,8 @@ def replace_hyperlink(m, src_doc_path):
should be safe for now. should be safe for now.
""" """
hyperlink_str = m.group(2) # Strip trailing whitespace since the greedy regex includes it
hyperlink_str = m.group(2).rstrip()
match = http_reg.match(hyperlink_str) match = http_reg.match(hyperlink_str)
if match: if match:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment