url_schemes.py 5.08 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
"""
4
5
MkDocs hook + markdown extension to enable the following links to render correctly,
including inside content included via pymdownx.snippets:
6

7
8
9
10
11
12
13
14
- Relative file links outside of the `docs/` directory, e.g.:
    - [Text](../some_file.py)
    - [Directory](../../some_directory/)
- GitHub URLs for issues, pull requests, and projects, e.g.:
    - Adds GitHub icon before links
    - Replaces raw links with descriptive text,
        e.g. <...pull/123> -> [Pull Request #123](.../pull/123)
    - Works for external repos too by including the `owner/repo` in the link title
15

16
17
18
19
The link replacement runs as a markdown preprocessor (priority 25) so that it executes
after pymdownx.snippets (priority 32) has expanded all included content.
The on_page_markdown hook passes the current page context to the preprocessor before
each page is converted.
20
21
"""

22
23
from pathlib import Path

24
import regex as re
25
26
from markdown import Extension
from markdown.preprocessors import Preprocessor
27
28
29
30
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.files import Files
from mkdocs.structure.pages import Page

31
32
33
34
35
36
37
38
39
40
ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve()
DOC_DIR = ROOT_DIR / "docs"

gh_icon = ":octicons-mark-github-16:"

# Regex pieces
TITLE = r"(?P<title>[^\[\]<>]+?)"
REPO = r"(?P<repo>.+?/.+?)"
TYPE = r"(?P<type>issues|pull|projects)"
NUMBER = r"(?P<number>\d+)"
41
PATH = r"(?P<path>[^\s]+?)"
42
43
FRAGMENT = r"(?P<fragment>#[^\s]+)?"
URL = f"https://github.com/{REPO}/{TYPE}/{NUMBER}{FRAGMENT}"
44
RELATIVE = rf"(?!(https?|ftp)://|#){PATH}{FRAGMENT}"
45
46
47
48
49
50
51
52
53

# Common titles to use for GitHub links when none is provided in the link.
TITLES = {"issues": "Issue ", "pull": "Pull Request ", "projects": "Project "}

# Regex to match GitHub issue, PR, and project links with optional titles.
github_link = re.compile(rf"(\[{TITLE}\]\(|<){URL}(\)|>)")
# Regex to match relative file links with optional titles.
relative_link = re.compile(rf"\[{TITLE}\]\({RELATIVE}\)")

54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
class UrlSchemesPreprocessor(Preprocessor):
    """Preprocessor that runs after pymdownx.snippets to process all links."""

    def __init__(self, md, ext):
        super().__init__(md)
        self.ext = ext

    def run(self, lines):
        page = self.ext.page
        if page is None or getattr(page.file, "abs_src_path", None) is None:
            return lines

        def replace_relative_link(match: re.Match) -> str:
            """
            Replace relative file links with URLs if they point outside the docs dir.
            """
            title = match.group("title")
            path = match.group("path")
            path = (Path(page.file.abs_src_path).parent / path).resolve()
            fragment = match.group("fragment") or ""

            # Check if the path exists and is outside the docs dir
            if not path.exists() or path.is_relative_to(DOC_DIR):
                return match.group(0)

            # Files and directories have different URL schemes on GitHub
            slug = "tree/main" if path.is_dir() else "blob/main"

            path = path.relative_to(ROOT_DIR)
            url = f"https://github.com/vllm-project/vllm/{slug}/{path}{fragment}"
            return f"[{gh_icon} {title}]({url})"

        def replace_github_link(match: re.Match) -> str:
            """
            Replace GitHub issue, PR, and project links with enhanced Markdown links.
            """
            repo = match.group("repo")
            type = match.group("type")
            number = match.group("number")
            # Title and fragment could be None
            title = match.group("title") or ""
            fragment = match.group("fragment") or ""

            # Use default titles for raw links
            if not title:
                title = TITLES[type]
                if "vllm-project" not in repo:
                    title += repo
                title += f"#{number}"

            url = f"https://github.com/{repo}/{type}/{number}{fragment}"
            return f"[{gh_icon} {title}]({url})"

        markdown = "\n".join(lines)
        markdown = relative_link.sub(replace_relative_link, markdown)
        markdown = github_link.sub(replace_github_link, markdown)
        return markdown.split("\n")


class UrlSchemesExtension(Extension):
    """Markdown extension that registers the URL schemes preprocessor."""

    def __init__(self, **kwargs):
        self.page = None
        super().__init__(**kwargs)

    def extendMarkdown(self, md):
        # Priority 25 runs after pymdownx.snippets (priority 32)
        md.preprocessors.register(UrlSchemesPreprocessor(md, self), "url_schemes", 25)


# Singleton extension instance shared between the hook and the preprocessor.
_ext = UrlSchemesExtension()


def on_config(config: MkDocsConfig) -> MkDocsConfig:
    """Register the URL schemes markdown extension."""
    config["markdown_extensions"].append(_ext)
    return config


136
137
138
def on_page_markdown(
    markdown: str, *, page: Page, config: MkDocsConfig, files: Files
) -> str:
139
140
    """Pass the current page context to the preprocessor."""
    _ext.page = page
141
    return markdown