generate_examples.py 9.32 KB
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0

3
import itertools
4
import re
5
from dataclasses import dataclass, field
6
7
from pathlib import Path

8
9
10
11
12
ROOT_DIR = Path(__file__).parent.parent.parent.resolve()
ROOT_DIR_RELATIVE = '../../../..'
EXAMPLE_DIR = ROOT_DIR / "examples"
EXAMPLE_DOC_DIR = ROOT_DIR / "docs/source/getting_started/examples"

13
14

def fix_case(text: str) -> str:
15
16
    subs = {
        "api": "API",
Harry Mellor's avatar
Harry Mellor committed
17
        "cli": "CLI",
18
19
        "cpu": "CPU",
        "llm": "LLM",
20
        "mae": "MAE",
21
22
23
24
        "tpu": "TPU",
        "aqlm": "AQLM",
        "gguf": "GGUF",
        "lora": "LoRA",
Harry Mellor's avatar
Harry Mellor committed
25
        "rlhf": "RLHF",
26
27
        "vllm": "vLLM",
        "openai": "OpenAI",
28
        "lmcache": "LMCache",
29
30
31
32
33
34
35
        "multilora": "MultiLoRA",
        "mlpspeculator": "MLPSpeculator",
        r"fp\d+": lambda x: x.group(0).upper(),  # e.g. fp16, fp32
        r"int\d+": lambda x: x.group(0).upper(),  # e.g. int8, int16
    }
    for pattern, repl in subs.items():
        text = re.sub(rf'\b{pattern}\b', repl, text, flags=re.IGNORECASE)
36
37
38
    return text


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
@dataclass
class Index:
    """
    Index class to generate a structured document index.

    Attributes:
        path (Path): The path save the index file to.
        title (str): The title of the index.
        description (str): A brief description of the index.
        caption (str): An optional caption for the table of contents.
        maxdepth (int): The maximum depth of the table of contents. Defaults to 1.
        documents (list[str]): A list of document paths to include in the index. Defaults to an empty list.

    Methods:
        generate() -> str:
            Generates the index content as a string in the specified format.
    """ # noqa: E501
    path: Path
    title: str
    description: str
    caption: str
    maxdepth: int = 1
    documents: list[str] = field(default_factory=list)

    def generate(self) -> str:
        content = f"# {self.title}\n\n{self.description}\n\n"
65
        content += ":::{toctree}\n"
66
        content += f":caption: {self.caption}\n:maxdepth: {self.maxdepth}\n"
67
        content += "\n".join(self.documents) + "\n:::\n"
68
69
70
71
72
73
74
75
76
77
78
79
        return content


@dataclass
class Example:
    """
    Example class for generating documentation content from a given path.

    Attributes:
        path (Path): The path to the main directory or file.
        category (str): The category of the document.
        main_file (Path): The main file in the directory.
80
        other_files (list[Path]): list of other files in the directory.
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
        title (str): The title of the document.

    Methods:
        __post_init__(): Initializes the main_file, other_files, and title attributes.
        determine_main_file() -> Path: Determines the main file in the given path.
        determine_other_files() -> list[Path]: Determines other files in the directory excluding the main file.
        determine_title() -> str: Determines the title of the document.
        generate() -> str: Generates the documentation content.
    """ # noqa: E501
    path: Path
    category: str = None
    main_file: Path = field(init=False)
    other_files: list[Path] = field(init=False)
    title: str = field(init=False)

    def __post_init__(self):
        self.main_file = self.determine_main_file()
        self.other_files = self.determine_other_files()
        self.title = self.determine_title()

    def determine_main_file(self) -> Path:
        """
        Determines the main file in the given path.
        If the path is a file, it returns the path itself. Otherwise, it searches
        for Markdown files (*.md) in the directory and returns the first one found.
        Returns:
            Path: The main file path, either the original path if it's a file or the first
            Markdown file found in the directory.
        Raises:
            IndexError: If no Markdown files are found in the directory.
        """ # noqa: E501
        return self.path if self.path.is_file() else list(
            self.path.glob("*.md")).pop()

    def determine_other_files(self) -> list[Path]:
        """
        Determine other files in the directory excluding the main file.

        This method checks if the given path is a file. If it is, it returns an empty list.
        Otherwise, it recursively searches through the directory and returns a list of all
        files that are not the main file.

        Returns:
            list[Path]: A list of Path objects representing the other files in the directory.
        """ # noqa: E501
        if self.path.is_file():
            return []
        is_other_file = lambda file: file.is_file() and file != self.main_file
        return [file for file in self.path.rglob("*") if is_other_file(file)]

    def determine_title(self) -> str:
        return fix_case(self.path.stem.replace("_", " ").title())

    def generate(self) -> str:
        # Convert the path to a relative path from __file__
        make_relative = lambda path: ROOT_DIR_RELATIVE / path.relative_to(
            ROOT_DIR)

        content = f"Source <gh-file:{self.path.relative_to(ROOT_DIR)}>.\n\n"
        include = "include" if self.main_file.suffix == ".md" else \
            "literalinclude"
142
143
144
145
146
147
        if include == "literalinclude":
            content += f"# {self.title}\n\n"
        content += f":::{{{include}}} {make_relative(self.main_file)}\n"
        if include == "literalinclude":
            content += f":language: {self.main_file.suffix[1:]}\n"
        content += ":::\n\n"
148
149
150
151
152

        if not self.other_files:
            return content

        content += "## Example materials\n\n"
153
        for file in sorted(self.other_files):
154
155
156
157
158
159
160
            include = "include" if file.suffix == ".md" else "literalinclude"
            content += f":::{{admonition}} {file.relative_to(self.path)}\n"
            content += ":class: dropdown\n\n"
            content += f":::{{{include}}} {make_relative(file)}\n:::\n"
            content += ":::\n\n"

        return content
161
162
163


def generate_examples():
164
165
166
167
168
169
170
171
172
173
174
    # Create the EXAMPLE_DOC_DIR if it doesn't exist
    if not EXAMPLE_DOC_DIR.exists():
        EXAMPLE_DOC_DIR.mkdir(parents=True)

    # Create empty indices
    examples_index = Index(
        path=EXAMPLE_DOC_DIR / "examples_index.md",
        title="Examples",
        description=
        "A collection of examples demonstrating usage of vLLM.\nAll documented examples are autogenerated using <gh-file:docs/source/generate_examples.py> from examples found in <gh-file:examples>.",  # noqa: E501
        caption="Examples",
175
176
177
        maxdepth=2)
    # Category indices stored in reverse order because they are inserted into
    # examples_index.documents at index 0 in order
178
    category_indices = {
179
        "other":
180
        Index(
181
182
            path=EXAMPLE_DOC_DIR / "examples_other_index.md",
            title="Other",
183
            description=
184
            "Other examples that don't strongly fit into the online or offline serving categories.",  # noqa: E501
185
186
187
188
189
190
191
192
193
194
            caption="Examples",
        ),
        "online_serving":
        Index(
            path=EXAMPLE_DOC_DIR / "examples_online_serving_index.md",
            title="Online Serving",
            description=
            "Online serving examples demonstrate how to use vLLM in an online setting, where the model is queried for predictions in real-time.",  # noqa: E501
            caption="Examples",
        ),
195
        "offline_inference":
196
        Index(
197
198
            path=EXAMPLE_DOC_DIR / "examples_offline_inference_index.md",
            title="Offline Inference",
199
            description=
200
            "Offline inference examples demonstrate how to use vLLM in an offline setting, where the model is queried for predictions in batches. We recommend starting with <project:basic.md>.",  # noqa: E501
201
202
203
204
205
            caption="Examples",
        ),
    }

    examples = []
206
    glob_patterns = ["*.py", "*.md", "*.sh"]
207
208
209
    # Find categorised examples
    for category in category_indices:
        category_dir = EXAMPLE_DIR / category
210
211
        globs = [category_dir.glob(pattern) for pattern in glob_patterns]
        for path in itertools.chain(*globs):
212
213
214
215
216
            examples.append(Example(path, category))
        # Find examples in subdirectories
        for path in category_dir.glob("*/*.md"):
            examples.append(Example(path.parent, category))
    # Find uncategorised examples
217
218
    globs = [EXAMPLE_DIR.glob(pattern) for pattern in glob_patterns]
    for path in itertools.chain(*globs):
219
220
221
222
223
224
225
226
227
        examples.append(Example(path))
    # Find examples in subdirectories
    for path in EXAMPLE_DIR.glob("*/*.md"):
        # Skip categorised examples
        if path.parent.name in category_indices:
            continue
        examples.append(Example(path.parent))

    # Generate the example documentation
228
    for example in sorted(examples, key=lambda e: e.path.stem):
229
        doc_path = EXAMPLE_DOC_DIR / f"{example.path.stem}.md"
230
        with open(doc_path, "w+") as f:
231
232
233
234
235
236
237
238
239
240
241
242
243
244
            f.write(example.generate())
        # Add the example to the appropriate index
        index = category_indices.get(example.category, examples_index)
        index.documents.append(example.path.stem)

    # Generate the index files
    for category_index in category_indices.values():
        if category_index.documents:
            examples_index.documents.insert(0, category_index.path.name)
            with open(category_index.path, "w+") as f:
                f.write(category_index.generate())

    with open(examples_index.path, "w+") as f:
        f.write(examples_index.generate())