"vscode:/vscode.git/clone" did not exist on "9e0ef888f0f1d39802dd0039064bb88a6918b320"
Unverified Commit 9701352e authored by Didier Durand's avatar Didier Durand Committed by GitHub
Browse files

[Doc]: fix typos in Python comments (#24001)


Signed-off-by: default avatarDidier Durand <durand.didier@gmail.com>
parent 749be00a
......@@ -43,7 +43,7 @@ cudagraph_capturing_enabled: bool = True
def validate_cudagraph_capturing_enabled():
# used to monitor whether an cudagraph capturing is legal at runtime.
# used to monitor whether a cudagraph capturing is legal at runtime.
# should be called before any cudagraph capturing.
# if an illegal cudagraph capturing happens, raise an error.
global cudagraph_capturing_enabled
......
......@@ -76,7 +76,7 @@ class LRUEvictor(Evictor):
that's recorded in the Block. If there are multiple blocks with
the same last_accessed time, then the one with the largest num_hashed_tokens
will be evicted. If two blocks each have the lowest last_accessed time and
highest num_hashed_tokens value, then one will be chose arbitrarily
highest num_hashed_tokens value, then one will be chosen arbitrarily
"""
# CLEANUP_THRESHOLD determines the maximum allowable size of the priority
......
......@@ -1239,7 +1239,7 @@ class LLMEngine:
# Stop the execute model loop in parallel workers until there are
# more requests to process. This avoids waiting indefinitely in
# torch.distributed ops which may otherwise timeout, and unblocks
# torch.distributed ops which may otherwise time out, and unblocks
# the RPC thread in the workers so that they can process any other
# queued control plane messages, such as add/remove lora adapters.
logger.debug("Stopping remote worker execution loop.")
......
......@@ -329,7 +329,7 @@ class LLM:
Args:
prompts: The prompts to the LLM. You may pass a sequence of prompts
for batch inference. See [PromptType][vllm.inputs.PromptType]
for more details about the format of each prompts.
for more details about the format of each prompt.
sampling_params: The sampling parameters for text generation. If
None, we use the default sampling parameters.
When it is a single value, it is applied to every prompt.
......@@ -853,7 +853,7 @@ class LLM:
Args:
prompts: The prompts to the LLM. You may pass a sequence of prompts
for batch inference. See [PromptType][vllm.inputs.PromptType]
for more details about the format of each prompts.
for more details about the format of each prompt.
pooling_params: The pooling parameters for pooling. If None, we
use the default pooling parameters.
use_tqdm: If `True`, shows a tqdm progress bar.
......@@ -946,7 +946,7 @@ class LLM:
Args:
prompts: The prompts to the LLM. You may pass a sequence of prompts
for batch inference. See [PromptType][vllm.inputs.PromptType]
for more details about the format of each prompts.
for more details about the format of each prompt.
pooling_params: The pooling parameters for pooling. If None, we
use the default pooling parameters.
use_tqdm: If `True`, shows a tqdm progress bar.
......@@ -994,7 +994,7 @@ class LLM:
Args:
prompts: The prompts to the LLM. You may pass a sequence of prompts
for batch inference. See [PromptType][vllm.inputs.PromptType]
for more details about the format of each prompts.
for more details about the format of each prompt.
use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
......@@ -1038,7 +1038,7 @@ class LLM:
Args:
prompts: The prompts to the LLM. You may pass a sequence of prompts
for batch inference. See [PromptType][vllm.inputs.PromptType]
for more details about the format of each prompts.
for more details about the format of each prompt.
use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
......
......@@ -101,7 +101,7 @@ class MultiprocessingDistributedExecutor(DistributedExecutorBase):
result_handler.start()
self.worker_monitor.start()
# Set up signal handlers to shutdown the executor cleanly
# Set up signal handlers to shut down the executor cleanly
# sometimes gc does not work well
self.driver_worker = WorkerWrapperBase(self.vllm_config, 0)
......
......@@ -605,7 +605,7 @@ class ColumnParallelLinearWithLoRA(BaseLinearLayerWithLoRA):
class MergedColumnParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
"""ColumnParallelLinear layer that is composed of 2 sublayers (slices)
packed together (eg. gate_proj + up_proj -> gate_up_proj).
packed together (e.g. gate_proj + up_proj -> gate_up_proj).
This means we have 2 LoRAs, each applied to one half of the layer.
......
......@@ -537,7 +537,7 @@ class Platform:
def get_global_graph_pool(self) -> Any:
"""
Return the global graph pool for the this platform.
Return the global graph pool for this platform.
"""
cls = self.__class__
if cls._global_graph_pool is None:
......
......@@ -30,7 +30,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
Key Features:
- For non-stream output , Recognizes and extracts reasoning ("think")
and answer ("answer") sections from text using regular expressions.
- For stream process, it require a token id sequences to change the
- For stream process, it requires a token id sequences to change the
reasoning state and other state so it maintains internal state to
manage parsing across multiple token.
......
......@@ -2734,7 +2734,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
layer_names)
attn_backends = {}
attn_backend_layers = defaultdict(list)
# Dedupe based on full class name; this is a bit safer than using
# Dedupe based on full class name; this is a bit safer than
# using the class itself as the key because when we create dynamic
# attention backend subclasses (e.g. ChunkedLocalAttention) unless
# they are cached correctly, there will be different objects per
......
......@@ -224,7 +224,7 @@ class Worker(WorkerBase):
memory can be used for KV cache without OOMs.
The engine will first conduct a profiling of the existing memory usage.
Then, it calculate the free memory that can be used for KV cache in
Then, it calculates the free memory that can be used for KV cache in
bytes.
Tip:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment