Unverified Commit 65097ca0 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Doc] Add model development API Reference (#11884)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 1d967acb
...@@ -38,7 +38,7 @@ steps: ...@@ -38,7 +38,7 @@ steps:
- pip install -r requirements-docs.txt - pip install -r requirements-docs.txt
- SPHINXOPTS=\"-W\" make html - SPHINXOPTS=\"-W\" make html
# Check API reference (if it fails, you may have missing mock imports) # Check API reference (if it fails, you may have missing mock imports)
- grep \"sig sig-object py\" build/html/api/params.html - grep \"sig sig-object py\" build/html/api/inference_params.html
- label: Async Engine, Inputs, Utils, Worker Test # 24min - label: Async Engine, Inputs, Utils, Worker Test # 24min
fast_check: true fast_check: true
......
# Optional Parameters # Inference Parameters
Optional parameters for vLLM APIs. Inference parameters for vLLM APIs.
(sampling-params)= (sampling-params)=
...@@ -19,4 +19,3 @@ Optional parameters for vLLM APIs. ...@@ -19,4 +19,3 @@ Optional parameters for vLLM APIs.
.. autoclass:: vllm.PoolingParams .. autoclass:: vllm.PoolingParams
:members: :members:
``` ```
# Model Adapters
## Module Contents
```{eval-rst}
.. automodule:: vllm.model_executor.models.adapters
:members:
:member-order: bysource
```
# Model Development
## Submodules
```{toctree}
:maxdepth: 1
interfaces_base
interfaces
adapters
```
# Optional Interfaces
## Module Contents
```{eval-rst}
.. automodule:: vllm.model_executor.models.interfaces
:members:
:member-order: bysource
```
# Base Model Interfaces
## Module Contents
```{eval-rst}
.. automodule:: vllm.model_executor.models.interfaces_base
:members:
:member-order: bysource
```
...@@ -139,8 +139,9 @@ community/sponsors ...@@ -139,8 +139,9 @@ community/sponsors
api/offline_inference/index api/offline_inference/index
api/engine/index api/engine/index
api/inference_params
api/multimodal/index api/multimodal/index
api/params api/model/index
``` ```
% Design Documents: Details about vLLM internals % Design Documents: Details about vLLM internals
......
...@@ -38,13 +38,15 @@ class SupportsMultiModal(Protocol): ...@@ -38,13 +38,15 @@ class SupportsMultiModal(Protocol):
to be merged with text embeddings. to be merged with text embeddings.
The output embeddings must be one of the following formats: The output embeddings must be one of the following formats:
- A list or tuple of 2D tensors, where each tensor corresponds to - A list or tuple of 2D tensors, where each tensor corresponds to
each input multimodal data item (e.g, image). each input multimodal data item (e.g, image).
- A single 3D tensor, with the batch dimension grouping the 2D tensors. - A single 3D tensor, with the batch dimension grouping the 2D tensors.
NOTE: The returned multimodal embeddings must be in the same order as Note:
the appearances of their corresponding multimodal data item in the The returned multimodal embeddings must be in the same order as
input prompt. the appearances of their corresponding multimodal data item in the
input prompt.
""" """
... ...
...@@ -59,6 +61,7 @@ class SupportsMultiModal(Protocol): ...@@ -59,6 +61,7 @@ class SupportsMultiModal(Protocol):
) -> torch.Tensor: ) -> torch.Tensor:
... ...
@overload
def get_input_embeddings( def get_input_embeddings(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor,
......
...@@ -35,6 +35,7 @@ T_co = TypeVar("T_co", default=torch.Tensor, covariant=True) ...@@ -35,6 +35,7 @@ T_co = TypeVar("T_co", default=torch.Tensor, covariant=True)
@runtime_checkable @runtime_checkable
class VllmModel(Protocol[C_co, T_co]): class VllmModel(Protocol[C_co, T_co]):
"""The interface required for all models in vLLM."""
def __init__( def __init__(
self, self,
...@@ -97,6 +98,7 @@ def is_vllm_model( ...@@ -97,6 +98,7 @@ def is_vllm_model(
@runtime_checkable @runtime_checkable
class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]): class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]):
"""The interface required for all generative models in vLLM."""
def compute_logits( def compute_logits(
self, self,
...@@ -142,6 +144,7 @@ def is_text_generation_model( ...@@ -142,6 +144,7 @@ def is_text_generation_model(
@runtime_checkable @runtime_checkable
class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]): class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]):
"""The interface required for all pooling models in vLLM."""
def pooler( def pooler(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment