"vscode:/vscode.git/clone" did not exist on "994acec0cc9d6348268b5f371c66239fe75f928d"
Unverified Commit a1fe24d9 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Migrate docs from Sphinx to MkDocs (#18145)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent d0bc2f81
......@@ -175,7 +175,7 @@ def get_vllm_port() -> Optional[int]:
# The begin-* and end* here are used by the documentation generator
# to extract the used env vars.
# begin-env-vars-definition
# --8<-- [start:env-vars-definition]
environment_variables: dict[str, Callable[[], Any]] = {
......@@ -813,7 +813,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"),
}
# end-env-vars-definition
# --8<-- [end:env-vars-definition]
def __getattr__(name: str):
......
......@@ -528,12 +528,12 @@ class RayDistributedExecutor(DistributedExecutorBase):
ray.get(parallel_worker_tasks)
def _check_ray_cgraph_installation(self):
import pkg_resources
import importlib.metadata
from packaging import version
required_version = version.parse("2.43.0")
current_version = version.parse(
pkg_resources.get_distribution("ray").version)
current_version = version.parse(importlib.metadata.version("ray"))
if current_version < required_version:
raise ValueError(f"Ray version {required_version} is "
f"required, but found {current_version}")
......
......@@ -681,9 +681,8 @@ class Blip2ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP,
batch.
pixel_values: The pixels in each input image.
:::{seealso}
{class}`Blip2ImageInputs`
:::
Info:
[Blip2ImageInputs][]
"""
if intermediate_tensors is not None:
......
......@@ -721,9 +721,8 @@ class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
batch.
pixel_values: The pixels in each input image.
:::{seealso}
{class}`LlavaImageInputs`
:::
Info:
[LlavaImageInputs][]
"""
if intermediate_tensors is not None:
inputs_embeds = None
......
......@@ -551,9 +551,8 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
pixel_values: The pixels in each grid patch for each input image.
image_sizes: The original `(height, width)` for each input image.
:::{seealso}
{class}`LlavaNextImageInputs`
:::
Info:
[LlavaNextImageInputs][]
"""
if intermediate_tensors is not None:
inputs_embeds = None
......
......@@ -559,9 +559,8 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA,
batch.
pixel_values: The pixels in each input image.
:::{seealso}
{class}`Mistral3ImagePixelInputs`
:::
Info:
[Mistral3ImagePixelInputs][]
"""
if intermediate_tensors is not None:
inputs_embeds = None
......
......@@ -11,9 +11,8 @@ MULTIMODAL_REGISTRY = MultiModalRegistry()
The global {class}`~MultiModalRegistry` is used by model runners to
dispatch data processing according to the target model.
:::{seealso}
{ref}`mm-processing`
:::
Info:
{ref}`mm-processing`
"""
__all__ = [
......
......@@ -289,9 +289,8 @@ class BaseMultiModalField(ABC):
@dataclass(frozen=True)
class MultiModalBatchedField(BaseMultiModalField):
"""
:::{seealso}
{func}`MultiModalFieldConfig.batched`
:::
Info:
[MultiModalFieldConfig.batched][]
"""
def build_elems(
......@@ -320,10 +319,9 @@ class MultiModalBatchedField(BaseMultiModalField):
@dataclass(frozen=True)
class MultiModalFlatField(BaseMultiModalField):
"""
:::{seealso}
{func}`MultiModalFieldConfig.flat`
{func}`MultiModalFieldConfig.flat_from_sizes`
:::
Info:
[MultiModalFieldConfig.flat][]
[MultiModalFieldConfig.flat_from_sizes][]
"""
slices: Union[Sequence[slice], Sequence[Sequence[slice]]]
dim: int = 0
......@@ -363,9 +361,8 @@ class MultiModalFlatField(BaseMultiModalField):
@dataclass(frozen=True)
class MultiModalSharedField(BaseMultiModalField):
"""
:::{seealso}
{func}`MultiModalFieldConfig.shared`
:::
Info:
[MultiModalFieldConfig.shared][]
"""
batch_size: int
......@@ -510,9 +507,8 @@ class MultiModalFieldConfig:
Element 3: [[C],[C]]
```
:::{seealso}
{func}`MultiModalFieldConfig.flat`
:::
Info:
[MultiModalFieldConfig.flat][]
"""
if size_per_item.ndim != 1:
......
......@@ -214,9 +214,8 @@ class MultiModalRegistry:
When the model receives multi-modal data, the provided function is
invoked to transform the data into a dictionary of model inputs.
:::{seealso}
{ref}`mm-processing`
:::
Info:
{ref}`mm-processing`
"""
def wrapper(model_cls: N) -> N:
......@@ -260,9 +259,8 @@ class MultiModalRegistry:
"""
Create a multi-modal processor for a specific model and tokenizer.
:::{seealso}
{ref}`mm-processing`
:::
Info:
{ref}`mm-processing`
"""
if not model_config.is_multimodal_model:
raise ValueError(f"{model_config.model} is not a multimodal model")
......
......@@ -1926,9 +1926,8 @@ class _PlaceholderBase:
We need to explicitly override each dunder method because
{meth}`__getattr__` is not called when they are accessed.
:::{seealso}
[Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
:::
Info:
[Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
"""
def __getattr__(self, key: str) -> Never:
......
......@@ -172,10 +172,9 @@ class Worker(WorkerBase):
Then, it calculate the free memory that can be used for KV cache in
bytes.
:::{tip}
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
:::
Tip:
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
"""
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
......
......@@ -201,10 +201,9 @@ class HPUWorker(LocalOrDistributedWorkerBase):
Then, it calculate the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
:::{tip}
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
:::
Tip:
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
"""
# Profile the memory usage of the model and get the maximum number of
# cache blocks that can be allocated with the remaining free memory.
......
......@@ -234,10 +234,9 @@ class Worker(LocalOrDistributedWorkerBase):
Then, it calculate the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
:::{tip}
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
:::
Tip:
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
"""
# Profile the memory usage of the model and get the maximum number of
# cache blocks that can be allocated with the remaining free memory.
......
......@@ -93,10 +93,9 @@ class XPUWorker(LoRANotSupportedWorkerBase, Worker):
Then, it calculate the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory.
:::{tip}
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
:::
Tip:
You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter.
"""
# Profile the memory usage of the model and get the maximum number of
# cache blocks that can be allocated with the remaining free memory.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment