Unverified Commit 41ae4a1e authored by Didier Durand's avatar Didier Durand Committed by GitHub
Browse files

[Doc]: fix typos in various files (#24798)


Signed-off-by: default avatarDidier Durand <durand.didier@gmail.com>
parent 4dad72f0
......@@ -42,7 +42,7 @@ def main():
llm_args["model"] = "meta-llama/Llama-3.1-8B-Instruct"
# Set `enforce_eager=True` to avoid ahead-of-time compilation.
# In real workloads, `enforace_eager` should be `False`.
# In real workloads, `enforce_eager` should be `False`.
llm = LLM(**llm_args)
outputs = llm.generate(prompts, sampling_params)
print("-" * 50)
......
......@@ -182,7 +182,7 @@ class NaiveBlockAllocator(BlockAllocator):
# Increment refcount for each block.
assert block.block_id is not None
refcount = self._refcounter.incr(block.block_id)
assert refcount != 1, "can't fork free'd block"
assert refcount != 1, "can't fork freed block"
forked_block = self._block_pool.init_block(
prev_block=prev_block,
......
......@@ -58,7 +58,7 @@ class Evictor(ABC):
class BlockMetaData:
"""Data structure for storing key data describe cached block, so that
evitor could use to make its decision which one to choose for eviction
evictor could use to make its decision which one to choose for eviction
Here we use physical block id as the dict key, as there maybe several
blocks with the same content hash, but their physical id is unique.
......
......@@ -379,7 +379,7 @@ class LoggingStatLogger(StatLoggerBase):
if local_interval_elapsed(stats.now, self.last_local_log,
self.local_interval):
# Compute summary metrics for tracked stats (and log them
# to promethus if applicable).
# to prometheus if applicable).
prompt_throughput = get_throughput(self.num_prompt_tokens,
now=stats.now,
last_log=self.last_local_log)
......@@ -432,7 +432,7 @@ class LoggingStatLogger(StatLoggerBase):
class PrometheusStatLogger(StatLoggerBase):
"""PrometheusStatLogger is used LLMEngine to log to Promethus."""
"""PrometheusStatLogger is used LLMEngine to log to Prometheus."""
_metrics_cls = Metrics
_gauge_cls = prometheus_client.Gauge
......
......@@ -740,7 +740,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
"""
Handle special case for models where MLP layers are already
fused on disk. In this case, we have no shard id. This function
determmines the shard id by splitting these layers and then calls
determines the shard id by splitting these layers and then calls
the weight loader using the shard id.
An example of a model with these fused layers:
......@@ -914,7 +914,7 @@ class QKVParallelLinear(ColumnParallelLinear):
"""
Handle special case for models where QKV layers are already
fused on disk. In this case, we have no shard id. This function
determmines the shard id by splitting these layers and then calls
determines the shard id by splitting these layers and then calls
the weight loader using the shard id.
An example of a model with these fused layers:
......
......@@ -258,7 +258,7 @@ class VocabParallelEmbedding(CustomOp):
if params_dtype is None:
params_dtype = torch.get_default_dtype()
# Divide the weight matrix along the vocaburaly dimension.
# Divide the weight matrix along the vocabulary dimension.
self.num_added_embeddings = self.num_embeddings - self.org_vocab_size
self.num_embeddings_per_partition = divide(self.num_embeddings_padded,
self.tp_size)
......
......@@ -1446,7 +1446,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(nn.Module, SupportsMultiModal,
return None
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary
......
......@@ -586,10 +586,10 @@ class Gemma3nForConditionalGeneration(nn.Module, SupportsMultiModal,
# ruff: noqa
# The Gemma3nProcessor expects all audio will be 30s in length and inserts 188 audio soft tokens into the
# text to account for this. However, the audio preprocessing and encoder do not gurarantee they will
# text to account for this. However, the audio preprocessing and encoder do not guarantee they will
# produce 188 soft tokens; they will produce at most that many tokens, but they may produce fewer tokens
# depending on the length of the longest audio input in the batch. When we encounter this situation, we pad
# the audio feature out to 188 soft tokens with the emebedding of the last token in the embed_audio vocab.
# the audio feature out to 188 soft tokens with the embedding of the last token in the embed_audio vocab.
# TODO precompute and cache padding
audio_padding_toks = torch.tensor([[self.vocab_size - 1]],
dtype=torch.long,
......
......@@ -560,7 +560,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
return []
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image).
# tensor corresponding to a multimodal data item (image).
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary
......
......@@ -1154,7 +1154,7 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
return None
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment