Unverified Commit a1fe24d9 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Migrate docs from Sphinx to MkDocs (#18145)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent d0bc2f81
...@@ -175,7 +175,7 @@ def get_vllm_port() -> Optional[int]: ...@@ -175,7 +175,7 @@ def get_vllm_port() -> Optional[int]:
# The begin-* and end* here are used by the documentation generator # The begin-* and end* here are used by the documentation generator
# to extract the used env vars. # to extract the used env vars.
# begin-env-vars-definition # --8<-- [start:env-vars-definition]
environment_variables: dict[str, Callable[[], Any]] = { environment_variables: dict[str, Callable[[], Any]] = {
...@@ -813,7 +813,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -813,7 +813,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"), lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"),
} }
# end-env-vars-definition # --8<-- [end:env-vars-definition]
def __getattr__(name: str): def __getattr__(name: str):
......
...@@ -528,12 +528,12 @@ class RayDistributedExecutor(DistributedExecutorBase): ...@@ -528,12 +528,12 @@ class RayDistributedExecutor(DistributedExecutorBase):
ray.get(parallel_worker_tasks) ray.get(parallel_worker_tasks)
def _check_ray_cgraph_installation(self): def _check_ray_cgraph_installation(self):
import pkg_resources import importlib.metadata
from packaging import version from packaging import version
required_version = version.parse("2.43.0") required_version = version.parse("2.43.0")
current_version = version.parse( current_version = version.parse(importlib.metadata.version("ray"))
pkg_resources.get_distribution("ray").version)
if current_version < required_version: if current_version < required_version:
raise ValueError(f"Ray version {required_version} is " raise ValueError(f"Ray version {required_version} is "
f"required, but found {current_version}") f"required, but found {current_version}")
......
...@@ -681,9 +681,8 @@ class Blip2ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP, ...@@ -681,9 +681,8 @@ class Blip2ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP,
batch. batch.
pixel_values: The pixels in each input image. pixel_values: The pixels in each input image.
:::{seealso} Info:
{class}`Blip2ImageInputs` [Blip2ImageInputs][]
:::
""" """
if intermediate_tensors is not None: if intermediate_tensors is not None:
......
...@@ -721,9 +721,8 @@ class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -721,9 +721,8 @@ class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
batch. batch.
pixel_values: The pixels in each input image. pixel_values: The pixels in each input image.
:::{seealso} Info:
{class}`LlavaImageInputs` [LlavaImageInputs][]
:::
""" """
if intermediate_tensors is not None: if intermediate_tensors is not None:
inputs_embeds = None inputs_embeds = None
......
...@@ -551,9 +551,8 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, ...@@ -551,9 +551,8 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
pixel_values: The pixels in each grid patch for each input image. pixel_values: The pixels in each grid patch for each input image.
image_sizes: The original `(height, width)` for each input image. image_sizes: The original `(height, width)` for each input image.
:::{seealso} Info:
{class}`LlavaNextImageInputs` [LlavaNextImageInputs][]
:::
""" """
if intermediate_tensors is not None: if intermediate_tensors is not None:
inputs_embeds = None inputs_embeds = None
......
...@@ -559,9 +559,8 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA, ...@@ -559,9 +559,8 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA,
batch. batch.
pixel_values: The pixels in each input image. pixel_values: The pixels in each input image.
:::{seealso} Info:
{class}`Mistral3ImagePixelInputs` [Mistral3ImagePixelInputs][]
:::
""" """
if intermediate_tensors is not None: if intermediate_tensors is not None:
inputs_embeds = None inputs_embeds = None
......
...@@ -11,9 +11,8 @@ MULTIMODAL_REGISTRY = MultiModalRegistry() ...@@ -11,9 +11,8 @@ MULTIMODAL_REGISTRY = MultiModalRegistry()
The global {class}`~MultiModalRegistry` is used by model runners to The global {class}`~MultiModalRegistry` is used by model runners to
dispatch data processing according to the target model. dispatch data processing according to the target model.
:::{seealso} Info:
{ref}`mm-processing` {ref}`mm-processing`
:::
""" """
__all__ = [ __all__ = [
......
...@@ -289,9 +289,8 @@ class BaseMultiModalField(ABC): ...@@ -289,9 +289,8 @@ class BaseMultiModalField(ABC):
@dataclass(frozen=True) @dataclass(frozen=True)
class MultiModalBatchedField(BaseMultiModalField): class MultiModalBatchedField(BaseMultiModalField):
""" """
:::{seealso} Info:
{func}`MultiModalFieldConfig.batched` [MultiModalFieldConfig.batched][]
:::
""" """
def build_elems( def build_elems(
...@@ -320,10 +319,9 @@ class MultiModalBatchedField(BaseMultiModalField): ...@@ -320,10 +319,9 @@ class MultiModalBatchedField(BaseMultiModalField):
@dataclass(frozen=True) @dataclass(frozen=True)
class MultiModalFlatField(BaseMultiModalField): class MultiModalFlatField(BaseMultiModalField):
""" """
:::{seealso} Info:
{func}`MultiModalFieldConfig.flat` [MultiModalFieldConfig.flat][]
{func}`MultiModalFieldConfig.flat_from_sizes` [MultiModalFieldConfig.flat_from_sizes][]
:::
""" """
slices: Union[Sequence[slice], Sequence[Sequence[slice]]] slices: Union[Sequence[slice], Sequence[Sequence[slice]]]
dim: int = 0 dim: int = 0
...@@ -363,9 +361,8 @@ class MultiModalFlatField(BaseMultiModalField): ...@@ -363,9 +361,8 @@ class MultiModalFlatField(BaseMultiModalField):
@dataclass(frozen=True) @dataclass(frozen=True)
class MultiModalSharedField(BaseMultiModalField): class MultiModalSharedField(BaseMultiModalField):
""" """
:::{seealso} Info:
{func}`MultiModalFieldConfig.shared` [MultiModalFieldConfig.shared][]
:::
""" """
batch_size: int batch_size: int
...@@ -510,9 +507,8 @@ class MultiModalFieldConfig: ...@@ -510,9 +507,8 @@ class MultiModalFieldConfig:
Element 3: [[C],[C]] Element 3: [[C],[C]]
``` ```
:::{seealso} Info:
{func}`MultiModalFieldConfig.flat` [MultiModalFieldConfig.flat][]
:::
""" """
if size_per_item.ndim != 1: if size_per_item.ndim != 1:
......
...@@ -214,9 +214,8 @@ class MultiModalRegistry: ...@@ -214,9 +214,8 @@ class MultiModalRegistry:
When the model receives multi-modal data, the provided function is When the model receives multi-modal data, the provided function is
invoked to transform the data into a dictionary of model inputs. invoked to transform the data into a dictionary of model inputs.
:::{seealso} Info:
{ref}`mm-processing` {ref}`mm-processing`
:::
""" """
def wrapper(model_cls: N) -> N: def wrapper(model_cls: N) -> N:
...@@ -260,9 +259,8 @@ class MultiModalRegistry: ...@@ -260,9 +259,8 @@ class MultiModalRegistry:
""" """
Create a multi-modal processor for a specific model and tokenizer. Create a multi-modal processor for a specific model and tokenizer.
:::{seealso} Info:
{ref}`mm-processing` {ref}`mm-processing`
:::
""" """
if not model_config.is_multimodal_model: if not model_config.is_multimodal_model:
raise ValueError(f"{model_config.model} is not a multimodal model") raise ValueError(f"{model_config.model} is not a multimodal model")
......
...@@ -1926,9 +1926,8 @@ class _PlaceholderBase: ...@@ -1926,9 +1926,8 @@ class _PlaceholderBase:
We need to explicitly override each dunder method because We need to explicitly override each dunder method because
{meth}`__getattr__` is not called when they are accessed. {meth}`__getattr__` is not called when they are accessed.
:::{seealso} Info:
[Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup) [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
:::
""" """
def __getattr__(self, key: str) -> Never: def __getattr__(self, key: str) -> Never:
......
...@@ -172,10 +172,9 @@ class Worker(WorkerBase): ...@@ -172,10 +172,9 @@ class Worker(WorkerBase):
Then, it calculate the free memory that can be used for KV cache in Then, it calculate the free memory that can be used for KV cache in
bytes. bytes.
:::{tip} Tip:
You may limit the usage of GPU memory You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter. by adjusting the `gpu_memory_utilization` parameter.
:::
""" """
torch.cuda.empty_cache() torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats() torch.cuda.reset_peak_memory_stats()
......
...@@ -201,10 +201,9 @@ class HPUWorker(LocalOrDistributedWorkerBase): ...@@ -201,10 +201,9 @@ class HPUWorker(LocalOrDistributedWorkerBase):
Then, it calculate the maximum possible number of GPU and CPU blocks Then, it calculate the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory. that can be allocated with the remaining free memory.
:::{tip} Tip:
You may limit the usage of GPU memory You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter. by adjusting the `gpu_memory_utilization` parameter.
:::
""" """
# Profile the memory usage of the model and get the maximum number of # Profile the memory usage of the model and get the maximum number of
# cache blocks that can be allocated with the remaining free memory. # cache blocks that can be allocated with the remaining free memory.
......
...@@ -234,10 +234,9 @@ class Worker(LocalOrDistributedWorkerBase): ...@@ -234,10 +234,9 @@ class Worker(LocalOrDistributedWorkerBase):
Then, it calculate the maximum possible number of GPU and CPU blocks Then, it calculate the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory. that can be allocated with the remaining free memory.
:::{tip} Tip:
You may limit the usage of GPU memory You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter. by adjusting the `gpu_memory_utilization` parameter.
:::
""" """
# Profile the memory usage of the model and get the maximum number of # Profile the memory usage of the model and get the maximum number of
# cache blocks that can be allocated with the remaining free memory. # cache blocks that can be allocated with the remaining free memory.
......
...@@ -93,10 +93,9 @@ class XPUWorker(LoRANotSupportedWorkerBase, Worker): ...@@ -93,10 +93,9 @@ class XPUWorker(LoRANotSupportedWorkerBase, Worker):
Then, it calculate the maximum possible number of GPU and CPU blocks Then, it calculate the maximum possible number of GPU and CPU blocks
that can be allocated with the remaining free memory. that can be allocated with the remaining free memory.
:::{tip} Tip:
You may limit the usage of GPU memory You may limit the usage of GPU memory
by adjusting the `gpu_memory_utilization` parameter. by adjusting the `gpu_memory_utilization` parameter.
:::
""" """
# Profile the memory usage of the model and get the maximum number of # Profile the memory usage of the model and get the maximum number of
# cache blocks that can be allocated with the remaining free memory. # cache blocks that can be allocated with the remaining free memory.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment