generate_examples.py 8.77 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
import itertools
4
import logging
5
6
from dataclasses import dataclass
from functools import cached_property
7
8
9
from pathlib import Path
from typing import Literal

10
11
import regex as re

12
13
logger = logging.getLogger("mkdocs")

14
ROOT_DIR = Path(__file__).parent.parent.parent.parent
15
ROOT_DIR_RELATIVE = "../../../../.."
16
EXAMPLE_DIR = ROOT_DIR / "examples"
17
EXAMPLE_DOC_DIR = ROOT_DIR / "docs/examples"
18
19


20
21
22
23
def title(text: str) -> str:
    # Default title case
    text = text.replace("_", " ").replace("/", " - ").title()
    # Custom substitutions
24
    subs = {
25
        "io": "IO",
26
27
        "rl": "RL",
        "api(s?)": r"API\1",
28
29
        "cli": "CLI",
        "cpu": "CPU",
30
        "ipc": "IPC",
31
32
        "llm": "LLM",
        "mae": "MAE",
33
        "ner": "NER",
34
35
36
        "tpu": "TPU",
        "gguf": "GGUF",
        "lora": "LoRA",
37
        "nccl": "NCCL",
38
39
40
41
42
43
44
45
46
47
        "rlhf": "RLHF",
        "vllm": "vLLM",
        "openai": "OpenAI",
        "lmcache": "LMCache",
        "multilora": "MultiLoRA",
        "mlpspeculator": "MLPSpeculator",
        r"fp\d+": lambda x: x.group(0).upper(),  # e.g. fp16, fp32
        r"int\d+": lambda x: x.group(0).upper(),  # e.g. int8, int16
    }
    for pattern, repl in subs.items():
48
        text = re.sub(rf"\b{pattern}\b", repl, text, flags=re.IGNORECASE)
49
50
51
52
53
54
55
56
57
58
59
    return text


@dataclass
class Example:
    """
    Example class for generating documentation content from a given path.

    Attributes:
        path (Path): The path to the main directory or file.
        category (str): The category of the document.
60
61
62
63
64
65

    Properties::
        main_file() -> Path | None: Determines the main file in the given path.
        other_files() -> list[Path]: Determines other files in the directory excluding
        the main file.
        title() -> str: Determines the title of the document.
66
67
68

    Methods:
        generate() -> str: Generates the documentation content.
69
    """
70

71
    path: Path
72
    category: str
73

74
75
76
    @cached_property
    def main_file(self) -> Path | None:
        """Determines the main file in the given path.
77

78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
        If path is a file, it returns the path itself. If path is a directory, it
        searches for Markdown files (*.md) in the directory and returns the first one
        found. If no Markdown files are found, it returns None."""
        # Single file example
        if self.path.is_file():
            return self.path
        # Multi file example with a README
        if md_paths := list(self.path.glob("*.md")):
            return md_paths[0]
        # Multi file example without a README
        return None

    @cached_property
    def other_files(self) -> list[Path]:
        """Determine other files in the directory excluding the main file.

        If path is a file, it returns an empty list. Otherwise, it returns every file
        in the directory except the main file in a list."""
        # Single file example
97
98
        if self.path.is_file():
            return []
99
        # Multi file example
100
        is_other_file = lambda file: file.is_file() and file != self.main_file
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
        return sorted(file for file in self.path.rglob("*") if is_other_file(file))

    @cached_property
    def is_code(self) -> bool:
        return self.main_file is not None and self.main_file.suffix != ".md"

    @cached_property
    def title(self) -> str:
        # Generate title from filename if no main md file found
        if self.main_file is None or self.is_code:
            return title(self.path.stem)
        # Specify encoding for building on Windows
        with open(self.main_file, encoding="utf-8") as f:
            first_line = f.readline().strip()
        match = re.match(r"^#\s+(?P<title>.+)$", first_line)
        if match:
            return match.group("title")
        raise ValueError(f"Title not found in {self.main_file}")
119

120
121
122
123
    def fix_relative_links(self, content: str) -> str:
        """
        Fix relative links in markdown content by converting them to gh-file
        format.
124

125
126
        Args:
            content (str): The markdown content to process
127

128
129
130
131
132
        Returns:
            str: Content with relative links converted to gh-file format
        """
        # Regex to match markdown links [text](relative_path)
        # This matches links that don't start with http, https, ftp, or #
133
        link_pattern = r"\[([^\]]*)\]\((?!(?:https?|ftp)://|#)([^)]+)\)"
134
135
136
137
138
139
140
141
142

        def replace_link(match):
            link_text = match.group(1)
            relative_path = match.group(2)

            # Make relative to repo root
            gh_file = (self.main_file.parent / relative_path).resolve()
            gh_file = gh_file.relative_to(ROOT_DIR)

143
144
145
146
147
148
            # Make GitHub URL
            url = "https://github.com/vllm-project/vllm/"
            url += "tree/main" if self.path.is_dir() else "blob/main"
            gh_url = f"{url}/{gh_file}"

            return f"[{link_text}]({gh_url})"
149
150
151

        return re.sub(link_pattern, replace_link, content)

152
    def generate(self) -> str:
153
        content = f"# {self.title}\n\n"
154
155
156
        url = "https://github.com/vllm-project/vllm/"
        url += "tree/main" if self.path.is_dir() else "blob/main"
        content += f"Source <{url}/{self.path.relative_to(ROOT_DIR)}>.\n\n"
157

158
159
160
        # Use long code fence to avoid issues with
        # included files containing code fences too
        code_fence = "``````"
161

162
163
164
165
166
167
168
169
170
171
172
173
174
175
        if self.main_file is not None:
            # Single file example or multi file example with a README
            if self.is_code:
                content += (
                    f"{code_fence}{self.main_file.suffix[1:]}\n"
                    f'--8<-- "{self.main_file}"\n'
                    f"{code_fence}\n"
                )
            else:
                with open(self.main_file, encoding="utf-8") as f:
                    # Skip the title from md snippets as it's been included above
                    main_content = f.readlines()[1:]
                content += self.fix_relative_links("".join(main_content))
            content += "\n"
176
        else:
177
178
179
180
181
182
183
184
            # Multi file example without a README
            for file in self.other_files:
                file_title = title(str(file.relative_to(self.path).with_suffix("")))
                content += f"## {file_title}\n\n"
                content += (
                    f'{code_fence}{file.suffix[1:]}\n--8<-- "{file}"\n{code_fence}\n\n'
                )
            return content
185
186
187
188
189

        if not self.other_files:
            return content

        content += "## Example materials\n\n"
190
        for file in self.other_files:
191
192
            content += f'??? abstract "{file.relative_to(self.path)}"\n'
            if file.suffix != ".md":
193
                content += f"    {code_fence}{file.suffix[1:]}\n"
194
195
            content += f'    --8<-- "{file}"\n'
            if file.suffix != ".md":
196
                content += f"    {code_fence}\n"
197
198
199
200
201

        return content


def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
202
203
204
205
206
    # Monkey-patch dirname_to_title in awesome-nav so that sub-directory names are
    # title-cased (e.g. "Offline Inference" instead of "Offline inference").
    import mkdocs_awesome_nav.nav.directory as _nav_dir

    _nav_dir.dirname_to_title = title
207
208
209
210
211
    logger.info("Generating example documentation")
    logger.debug("Root directory: %s", ROOT_DIR.resolve())
    logger.debug("Example directory: %s", EXAMPLE_DIR.resolve())
    logger.debug("Example document directory: %s", EXAMPLE_DOC_DIR.resolve())

212
213
214
215
216
217
218
219
220
221
    # Create the EXAMPLE_DOC_DIR if it doesn't exist
    if not EXAMPLE_DOC_DIR.exists():
        EXAMPLE_DOC_DIR.mkdir(parents=True)

    categories = sorted(p for p in EXAMPLE_DIR.iterdir() if p.is_dir())

    examples = []
    glob_patterns = ["*.py", "*.md", "*.sh"]
    # Find categorised examples
    for category in categories:
222
        logger.info("Processing category: %s", category.stem)
223
224
225
226
        globs = [category.glob(pattern) for pattern in glob_patterns]
        for path in itertools.chain(*globs):
            examples.append(Example(path, category.stem))
        # Find examples in subdirectories
227
228
        globs = [category.glob(f"*/{pattern}") for pattern in glob_patterns]
        for path in itertools.chain(*globs):
229
230
231
232
233
234
235
236
            examples.append(Example(path.parent, category.stem))

    # Generate the example documentation
    for example in sorted(examples, key=lambda e: e.path.stem):
        example_name = f"{example.path.stem}.md"
        doc_path = EXAMPLE_DOC_DIR / example.category / example_name
        if not doc_path.parent.exists():
            doc_path.parent.mkdir(parents=True)
237
238
        # Specify encoding for building on Windows
        with open(doc_path, "w+", encoding="utf-8") as f:
239
            f.write(example.generate())
240
        logger.debug("Example generated: %s", doc_path.relative_to(ROOT_DIR))
241
    logger.info("Total examples generated: %d", len(examples))