Unverified Commit e2b1f863 authored by Didier Durand's avatar Didier Durand Committed by GitHub
Browse files

[Doc]: fixing doc typos (#24635)


Signed-off-by: default avatarDidier Durand <durand.didier@gmail.com>
parent 41329a0f
...@@ -3881,7 +3881,7 @@ def contains_object_print(text): ...@@ -3881,7 +3881,7 @@ def contains_object_print(text):
Check if the text looks like a printed Python object, e.g. Check if the text looks like a printed Python object, e.g.
contains any substring matching the pattern: "at 0xFFFFFFF>" contains any substring matching the pattern: "at 0xFFFFFFF>"
We match against 0x followed by 2-16 hex chars (there's We match against 0x followed by 2-16 hex chars (there's
a max of 16 on a 64 bit system). a max of 16 on a 64-bit system).
Args: Args:
text (str): The text to check text (str): The text to check
......
...@@ -60,7 +60,7 @@ class Internlm2ToolParser(ToolParser): ...@@ -60,7 +60,7 @@ class Internlm2ToolParser(ToolParser):
if '<|action_start|>' not in current_text: if '<|action_start|>' not in current_text:
self.position = len(current_text) self.position = len(current_text)
return DeltaMessage(content=delta_text) return DeltaMessage(content=delta_text)
# if the tool call is sended, return an empty delta message # if the tool call is sent, return an empty delta message
# to make sure the finish_reason will be sent correctly. # to make sure the finish_reason will be sent correctly.
if self.current_tool_id > 0: if self.current_tool_id > 0:
return DeltaMessage(content='') return DeltaMessage(content='')
......
...@@ -502,7 +502,7 @@ def _chunk_state_varlen_kernel( ...@@ -502,7 +502,7 @@ def _chunk_state_varlen_kernel(
dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize
# If the sequence starts after the last chunk idx, we don't need to add the contribution from the last chunk # If the sequence starts after the last chunk idx, we don't need to add the contribution from the last chunk
# If HAS_INITSTATES==True need to consider two possiblties # If HAS_INITSTATES==True need to consider two possibilities
# - if start_idx < pid_c * chunk_size, then we need to take the past_states_ptrs # - if start_idx < pid_c * chunk_size, then we need to take the past_states_ptrs
# - if state_idx >= pid * chunk_size, then we need to insert initstates # - if state_idx >= pid * chunk_size, then we need to insert initstates
if ((start_idx < pid_c * chunk_size) # first chunk if ((start_idx < pid_c * chunk_size) # first chunk
......
...@@ -342,7 +342,7 @@ class ArceeModel(nn.Module): ...@@ -342,7 +342,7 @@ class ArceeModel(nn.Module):
class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"""Arcee Model for causal language modeling, integrated with vLLM """Arcee Model for causal language modeling, integrated with vLLM
runtime.""" runtime."""
# Map fused module names to their sub-module components # Map fused module names to their submodule components
# (for quantization and LoRA) # (for quantization and LoRA)
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"], "qkv_proj": ["q_proj", "k_proj", "v_proj"],
......
...@@ -835,7 +835,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, ...@@ -835,7 +835,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal,
return None return None
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video). # tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary
......
...@@ -1350,7 +1350,7 @@ class Phi4MultimodalForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal): ...@@ -1350,7 +1350,7 @@ class Phi4MultimodalForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
return None return None
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video). # tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary
......
...@@ -100,7 +100,7 @@ class ConformerEncoderLayer(nn.Module): ...@@ -100,7 +100,7 @@ class ConformerEncoderLayer(nn.Module):
activation function for glu used in the multihead attention, activation function for glu used in the multihead attention,
default "swish". default "swish".
activation_checkpointing: str, optional activation_checkpointing: str, optional
a dictionarry of {"module","interval","offload"}, where a dictionary of {"module","interval","offload"}, where
"module": str "module": str
accept ["transformer", "attention"] to select accept ["transformer", "attention"] to select
which module should do activation checkpointing. which module should do activation checkpointing.
......
...@@ -846,7 +846,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration( ...@@ -846,7 +846,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
return [] return []
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video). # tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary
...@@ -873,7 +873,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration( ...@@ -873,7 +873,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
if multimodal_embeddings is not None \ if multimodal_embeddings is not None \
and len(multimodal_embeddings) != 0: and len(multimodal_embeddings) != 0:
# TODO (ywang96): support overlapping modalitiy embeddings so that # TODO (ywang96): support overlapping modality embeddings so that
# `use_audio_in_video` will work on V1. # `use_audio_in_video` will work on V1.
inputs_embeds = merge_multimodal_embeddings( inputs_embeds = merge_multimodal_embeddings(
input_ids, inputs_embeds, multimodal_embeddings, [ input_ids, inputs_embeds, multimodal_embeddings, [
......
...@@ -463,7 +463,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]): ...@@ -463,7 +463,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
self.dcp_world_size = 1 self.dcp_world_size = 1
self.dcp_rank = 0 self.dcp_rank = 0
# Dont try to access the runner on AMD # Don't try to access the runner on AMD
if self.aot_schedule: if self.aot_schedule:
self.page_size = self.kv_cache_spec.block_size self.page_size = self.kv_cache_spec.block_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment