Unverified Commit 4e4d017b authored by Hyogeun Oh (오효근)'s avatar Hyogeun Oh (오효근) Committed by GitHub
Browse files

[Docs] Fix warnings in `mkdocs build` (continued) (#23743)


Signed-off-by: default avatarZerohertz <ohg3417@gmail.com>
Signed-off-by: default avatarHyogeun Oh (오효근) <ohg3417@gmail.com>
parent dd589322
...@@ -110,7 +110,7 @@ class StructuredOutputBackend(ABC): ...@@ -110,7 +110,7 @@ class StructuredOutputBackend(ABC):
Args: Args:
request_type (StructuredOutputOptions): The type of structured request_type (StructuredOutputOptions): The type of structured
output request. output request.
grammar_spec (str): The grammar specification to compile. grammar_spec (str): The grammar specification to compile.
Returns: Returns:
...@@ -124,7 +124,7 @@ class StructuredOutputBackend(ABC): ...@@ -124,7 +124,7 @@ class StructuredOutputBackend(ABC):
Args: Args:
max_num_seqs (int): The maximum number of sequences for which max_num_seqs (int): The maximum number of sequences for which
to allocate the bitmask. to allocate the bitmask.
""" """
@abstractmethod @abstractmethod
......
...@@ -525,9 +525,6 @@ class InputBatch: ...@@ -525,9 +525,6 @@ class InputBatch:
Any consecutive empty indices at the very end of the list are not Any consecutive empty indices at the very end of the list are not
filled. filled.
Args:
empty_req_indices: empty indices which may be filled.
Returns: Returns:
swaps: list of (from,to) swap tuples for moved requests swaps: list of (from,to) swap tuples for moved requests
empty_req_indices: indices not filled by condensation empty_req_indices: indices not filled by condensation
......
...@@ -2955,7 +2955,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -2955,7 +2955,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
Args: Args:
kv_cache_config: The KV cache config kv_cache_config: The KV cache config
kv_cache_raw_tensors: The KV cache buffer of each layer, with kv_cache_raw_tensors: The KV cache buffer of each layer, with
correct size but uninitialized shape. correct size but uninitialized shape.
Returns: Returns:
Dict[str, torch.Tensor]: A map between layer names to their Dict[str, torch.Tensor]: A map between layer names to their
corresponding memory buffer for KV cache. corresponding memory buffer for KV cache.
......
...@@ -552,7 +552,7 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -552,7 +552,7 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
return kv_cache_spec return kv_cache_spec
def _get_slot_mapping_metadata(self, num_reqs, def _get_slot_mapping_metadata(self, num_reqs,
num_scheduled_tokens_per_req): num_scheduled_tokens_per_req) -> np.ndarray:
""" """
Computes metadata for mapping slots to blocks in the key-value (KV) Computes metadata for mapping slots to blocks in the key-value (KV)
cache for a batch of requests. cache for a batch of requests.
...@@ -565,15 +565,15 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -565,15 +565,15 @@ class TPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
Args: Args:
num_reqs (int): Number of requests in the current batch. num_reqs (int): Number of requests in the current batch.
num_scheduled_tokens_per_req (int or np.ndarray): Number of tokens num_scheduled_tokens_per_req (int or np.ndarray): Number of tokens
to be scheduled for each request. to be scheduled for each request.
Returns: Returns:
np.ndarray: A 2D array of shape (total_block_len, 3), where each row np.ndarray: A 2D array of shape (total_block_len, 3), where each row
contains: contains:
- kv_cache_start_index (int): The starting index in the KV cache - kv_cache_start_index (int): The starting index in the KV cache
for the corresponding slice. for the corresponding slice.
- new_kv_start_index (int): The starting index in the new KV - new_kv_start_index (int): The starting index in the new KV
cache for the corresponding slice. cache for the corresponding slice.
- slice_len (int): The length of the slice. - slice_len (int): The length of the slice.
""" """
slices_start = self.input_batch.num_computed_tokens_cpu[:num_reqs] slices_start = self.input_batch.num_computed_tokens_cpu[:num_reqs]
......
...@@ -172,10 +172,10 @@ def scatter_mm_placeholders( ...@@ -172,10 +172,10 @@ def scatter_mm_placeholders(
Args: Args:
embeds: The multimodal embeddings. embeds: The multimodal embeddings.
Shape: `(num_embeds, embed_dim)` Shape: `(num_embeds, embed_dim)`
is_embed: A boolean mask indicating which positions in the placeholder is_embed: A boolean mask indicating which positions in the placeholder
tokens need to be filled with multimodal embeddings. tokens need to be filled with multimodal embeddings.
Shape: `(num_placeholders, num_embeds)` Shape: `(num_placeholders, num_embeds)`
""" """
if is_embed is None: if is_embed is None:
return embeds return embeds
...@@ -278,7 +278,7 @@ def bind_kv_cache( ...@@ -278,7 +278,7 @@ def bind_kv_cache(
Args: Args:
kv_caches: The allocated kv_caches with layer names as keys. kv_caches: The allocated kv_caches with layer names as keys.
forward_context: The global forward context containing all Attention forward_context: The global forward context containing all Attention
layers with layer names as keys. layers with layer names as keys.
runner_kv_caches: The kv_cache declared by ModelRunner. runner_kv_caches: The kv_cache declared by ModelRunner.
""" """
# Bind kv_caches to ModelRunner # Bind kv_caches to ModelRunner
......
...@@ -36,8 +36,8 @@ class WorkerBase(WorkerBaseV0): ...@@ -36,8 +36,8 @@ class WorkerBase(WorkerBaseV0):
local_rank: Local device index local_rank: Local device index
rank: Global rank in distributed setup rank: Global rank in distributed setup
distributed_init_method: Distributed initialization method distributed_init_method: Distributed initialization method
is_driver_worker: Whether this worker handles driver is_driver_worker: Whether this worker handles driver
responsibilities responsibilities
""" """
# Configuration storage # Configuration storage
super().__init__(vllm_config=vllm_config) super().__init__(vllm_config=vllm_config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment