Unverified Commit 1bd007f2 authored by co63oc's avatar co63oc Committed by GitHub
Browse files

fix some typos (#24071)


Signed-off-by: default avatarco63oc <co63oc@users.noreply.github.com>
parent 136d853e
...@@ -362,7 +362,7 @@ class ReLUSquaredActivation(CustomOp): ...@@ -362,7 +362,7 @@ class ReLUSquaredActivation(CustomOp):
return torch.square(F.relu(x)) return torch.square(F.relu(x))
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
#TODO : implement cuda kenrels #TODO : implement cuda kernels
return self.forward_native(x) return self.forward_native(x)
......
...@@ -83,7 +83,7 @@ class HadamardTransform(torch.nn.Module): ...@@ -83,7 +83,7 @@ class HadamardTransform(torch.nn.Module):
# do not fold into weight in order to utilize FWHT # do not fold into weight in order to utilize FWHT
self.scales[part_id] = 1 / math.sqrt(data.size(0)) self.scales[part_id] = 1 / math.sqrt(data.size(0))
# FUTURE: avoid runtime tranpose by processing weights # FUTURE: avoid runtime transpose by processing weights
# prior to apply # prior to apply
def forward(self, value: Tensor, part_id: int = 0) -> Tensor: def forward(self, value: Tensor, part_id: int = 0) -> Tensor:
......
...@@ -310,7 +310,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ...@@ -310,7 +310,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
w13_bias = layer.w13_bias.data.to(torch.float32) w13_bias = layer.w13_bias.data.to(torch.float32)
w2_bias = layer.w2_bias.data.to(torch.float32) w2_bias = layer.w2_bias.data.to(torch.float32)
# Swap w1 and w3 as the defenition of # Swap w1 and w3 as the definition of
# swiglu is different in the trtllm-gen # swiglu is different in the trtllm-gen
def swap_every_two_rows(x, axis=-1): def swap_every_two_rows(x, axis=-1):
shape = x.shape shape = x.shape
......
...@@ -179,7 +179,7 @@ class Gemma3nMultiModalProcessor(BaseMultiModalProcessor[Gemma3nProcessingInfo] ...@@ -179,7 +179,7 @@ class Gemma3nMultiModalProcessor(BaseMultiModalProcessor[Gemma3nProcessingInfo]
) -> BatchFeature: ) -> BatchFeature:
# HF Transformers audio processor no longer accepts `audios` key. # HF Transformers audio processor no longer accepts `audios` key.
# We pop `audios` and replace it with `audio` key to surpress # We pop `audios` and replace it with `audio` key to suppress
# the warning. # the warning.
if 'audios' in mm_data: if 'audios' in mm_data:
mm_data['audio'] = mm_data.pop('audios') mm_data['audio'] = mm_data.pop('audios')
......
...@@ -492,7 +492,7 @@ class InternS1ForConditionalGeneration(nn.Module, SupportsMultiModal, ...@@ -492,7 +492,7 @@ class InternS1ForConditionalGeneration(nn.Module, SupportsMultiModal,
@classmethod @classmethod
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
# transformers InternVLProcessor uses <IMG_CONTEXT> as the seperator # transformers InternVLProcessor uses <IMG_CONTEXT> as the separator
# refer to https://github.com/huggingface/transformers/blob/f90de364c2484c7c325bbe05befdcf487bd75b63/src/transformers/models/internvl/processing_internvl.py#L116 # refer to https://github.com/huggingface/transformers/blob/f90de364c2484c7c325bbe05befdcf487bd75b63/src/transformers/models/internvl/processing_internvl.py#L116
if modality.startswith("image"): if modality.startswith("image"):
return '<IMG_CONTEXT>' return '<IMG_CONTEXT>'
......
...@@ -3533,7 +3533,7 @@ def nvmlDeviceGetMPSComputeRunningProcesses_v3(handle): ...@@ -3533,7 +3533,7 @@ def nvmlDeviceGetMPSComputeRunningProcesses_v3(handle):
return [] return []
elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): elif (ret == NVML_ERROR_INSUFFICIENT_SIZE):
# typical case # typical case
# oversize the array incase more processes are created # oversize the array in case more processes are created
c_count.value = c_count.value * 2 + 5 c_count.value = c_count.value * 2 + 5
proc_array = c_nvmlProcessInfo_v3_t * c_count.value proc_array = c_nvmlProcessInfo_v3_t * c_count.value
c_procs = proc_array() c_procs = proc_array()
......
...@@ -167,7 +167,7 @@ class FlashAttentionMetadataBuilder( ...@@ -167,7 +167,7 @@ class FlashAttentionMetadataBuilder(
# work for mixed prefill-decode and uniform-decode. But for non-spec decodes # work for mixed prefill-decode and uniform-decode. But for non-spec decodes
# the graphs would not work for mixed prefill-decode; sorta the inverse # the graphs would not work for mixed prefill-decode; sorta the inverse
# of UNIFORM_SINGLE_TOKEN_DECODE. # of UNIFORM_SINGLE_TOKEN_DECODE.
# Theres probably a better way to describe this using `AttentionCGSupport` # There's probably a better way to describe this using `AttentionCGSupport`
# but for now just set it to `UNIFORM_BATCH` to get use to drop down # but for now just set it to `UNIFORM_BATCH` to get use to drop down
# to FULL_AND_PIECEWISE. # to FULL_AND_PIECEWISE.
# TODO(luka, lucas): audit FA2 as part of: # TODO(luka, lucas): audit FA2 as part of:
......
...@@ -291,7 +291,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]): ...@@ -291,7 +291,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
paged_kv_indices_buffer=paged_kv_indices, paged_kv_indices_buffer=paged_kv_indices,
paged_kv_last_page_len_buffer=paged_kv_last_page_len, paged_kv_last_page_len_buffer=paged_kv_last_page_len,
# Tensor cores are enabled by default because the perf would be # Tensor cores are enabled by default because the perf would be
# atleast as good as cuda cores for all attention ops in latest # at least as good as cuda cores for all attention ops in latest
# gpus. # gpus.
use_tensor_cores=True, use_tensor_cores=True,
) )
......
...@@ -217,7 +217,7 @@ class FreeKVCacheBlockQueue: ...@@ -217,7 +217,7 @@ class FreeKVCacheBlockQueue:
# Create a fake head and a tail block for the doubly linked list to # Create a fake head and a tail block for the doubly linked list to
# reduce branching in the code # reduce branching in the code
# #
# The implementation garenteed that the fake head and tail # The implementation guaranteed that the fake head and tail
# are NEVER got popped, so we could safely assume each real blocks # are NEVER got popped, so we could safely assume each real blocks
# in the queue has prev and next blocks. # in the queue has prev and next blocks.
self.fake_free_list_head = KVCacheBlock(block_id=-1) self.fake_free_list_head = KVCacheBlock(block_id=-1)
......
...@@ -584,7 +584,7 @@ class InputBatch: ...@@ -584,7 +584,7 @@ class InputBatch:
if self.is_pooling_model: if self.is_pooling_model:
last_req_index -= 1 last_req_index -= 1
# Samping state not used by pooling models. # Sampling state not used by pooling models.
continue continue
# Autoregressive models require detailed tracking of condense # Autoregressive models require detailed tracking of condense
......
...@@ -2776,7 +2776,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): ...@@ -2776,7 +2776,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
self.attn_groups.append( self.attn_groups.append(
create_attn_groups(attn_backends, kv_cache_spec)) create_attn_groups(attn_backends, kv_cache_spec))
# Calculate reorder batch threshold (if neeeded) # Calculate reorder batch threshold (if needed)
self.calculate_reorder_batch_threshold() self.calculate_reorder_batch_threshold()
def initialize_cudagraph_capture(self) -> None: def initialize_cudagraph_capture(self) -> None:
......
...@@ -82,7 +82,7 @@ class KVConnectorModelRunnerMixin: ...@@ -82,7 +82,7 @@ class KVConnectorModelRunnerMixin:
scheduler_output) if has_kv_transfer_group() else nullcontext() scheduler_output) if has_kv_transfer_group() else nullcontext()
# This context manager must be used within an active forward context. # This context manager must be used within an active forward context.
# It encapsulates the entire KV conector lifecycle within execute_model # It encapsulates the entire KV connector lifecycle within execute_model
@staticmethod @staticmethod
@contextmanager @contextmanager
def _get_kv_connector_output( def _get_kv_connector_output(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment