Unverified Commit 1bd007f2 authored by co63oc's avatar co63oc Committed by GitHub
Browse files

fix some typos (#24071)


Signed-off-by: default avatarco63oc <co63oc@users.noreply.github.com>
parent 136d853e
......@@ -362,7 +362,7 @@ class ReLUSquaredActivation(CustomOp):
return torch.square(F.relu(x))
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
#TODO : implement cuda kenrels
#TODO : implement cuda kernels
return self.forward_native(x)
......
......@@ -83,7 +83,7 @@ class HadamardTransform(torch.nn.Module):
# do not fold into weight in order to utilize FWHT
self.scales[part_id] = 1 / math.sqrt(data.size(0))
# FUTURE: avoid runtime tranpose by processing weights
# FUTURE: avoid runtime transpose by processing weights
# prior to apply
def forward(self, value: Tensor, part_id: int = 0) -> Tensor:
......
......@@ -310,7 +310,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
w13_bias = layer.w13_bias.data.to(torch.float32)
w2_bias = layer.w2_bias.data.to(torch.float32)
# Swap w1 and w3 as the defenition of
# Swap w1 and w3 as the definition of
# swiglu is different in the trtllm-gen
def swap_every_two_rows(x, axis=-1):
shape = x.shape
......
......@@ -179,7 +179,7 @@ class Gemma3nMultiModalProcessor(BaseMultiModalProcessor[Gemma3nProcessingInfo]
) -> BatchFeature:
# HF Transformers audio processor no longer accepts `audios` key.
# We pop `audios` and replace it with `audio` key to surpress
# We pop `audios` and replace it with `audio` key to suppress
# the warning.
if 'audios' in mm_data:
mm_data['audio'] = mm_data.pop('audios')
......
......@@ -492,7 +492,7 @@ class InternS1ForConditionalGeneration(nn.Module, SupportsMultiModal,
@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
# transformers InternVLProcessor uses <IMG_CONTEXT> as the seperator
# transformers InternVLProcessor uses <IMG_CONTEXT> as the separator
# refer to https://github.com/huggingface/transformers/blob/f90de364c2484c7c325bbe05befdcf487bd75b63/src/transformers/models/internvl/processing_internvl.py#L116
if modality.startswith("image"):
return '<IMG_CONTEXT>'
......
......@@ -3533,7 +3533,7 @@ def nvmlDeviceGetMPSComputeRunningProcesses_v3(handle):
return []
elif (ret == NVML_ERROR_INSUFFICIENT_SIZE):
# typical case
# oversize the array incase more processes are created
# oversize the array in case more processes are created
c_count.value = c_count.value * 2 + 5
proc_array = c_nvmlProcessInfo_v3_t * c_count.value
c_procs = proc_array()
......
......@@ -167,7 +167,7 @@ class FlashAttentionMetadataBuilder(
# work for mixed prefill-decode and uniform-decode. But for non-spec decodes
# the graphs would not work for mixed prefill-decode; sorta the inverse
# of UNIFORM_SINGLE_TOKEN_DECODE.
# Theres probably a better way to describe this using `AttentionCGSupport`
# There's probably a better way to describe this using `AttentionCGSupport`
# but for now just set it to `UNIFORM_BATCH` to get use to drop down
# to FULL_AND_PIECEWISE.
# TODO(luka, lucas): audit FA2 as part of:
......
......@@ -291,7 +291,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
paged_kv_indices_buffer=paged_kv_indices,
paged_kv_last_page_len_buffer=paged_kv_last_page_len,
# Tensor cores are enabled by default because the perf would be
# atleast as good as cuda cores for all attention ops in latest
# at least as good as cuda cores for all attention ops in latest
# gpus.
use_tensor_cores=True,
)
......
......@@ -217,7 +217,7 @@ class FreeKVCacheBlockQueue:
# Create a fake head and a tail block for the doubly linked list to
# reduce branching in the code
#
# The implementation garenteed that the fake head and tail
# The implementation guaranteed that the fake head and tail
# are NEVER got popped, so we could safely assume each real blocks
# in the queue has prev and next blocks.
self.fake_free_list_head = KVCacheBlock(block_id=-1)
......
......@@ -584,7 +584,7 @@ class InputBatch:
if self.is_pooling_model:
last_req_index -= 1
# Samping state not used by pooling models.
# Sampling state not used by pooling models.
continue
# Autoregressive models require detailed tracking of condense
......
......@@ -2776,7 +2776,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
self.attn_groups.append(
create_attn_groups(attn_backends, kv_cache_spec))
# Calculate reorder batch threshold (if neeeded)
# Calculate reorder batch threshold (if needed)
self.calculate_reorder_batch_threshold()
def initialize_cudagraph_capture(self) -> None:
......
......@@ -82,7 +82,7 @@ class KVConnectorModelRunnerMixin:
scheduler_output) if has_kv_transfer_group() else nullcontext()
# This context manager must be used within an active forward context.
# It encapsulates the entire KV conector lifecycle within execute_model
# It encapsulates the entire KV connector lifecycle within execute_model
@staticmethod
@contextmanager
def _get_kv_connector_output(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment