Unverified Commit 4979eb79 authored by Didier Durand's avatar Didier Durand Committed by GitHub
Browse files

[Doc]: fix typos in various files (#24821)


Signed-off-by: default avatarDidier Durand <durand.didier@gmail.com>
parent a8c0f599
...@@ -8,7 +8,7 @@ This benchmark aims to: ...@@ -8,7 +8,7 @@ This benchmark aims to:
Latest results: [results link](https://blog.vllm.ai/2024/09/05/perf-update.html), scroll to the end. Latest results: [results link](https://blog.vllm.ai/2024/09/05/perf-update.html), scroll to the end.
Latest reproduction guilde: [github issue link](https://github.com/vllm-project/vllm/issues/8176) Latest reproduction guide: [github issue link](https://github.com/vllm-project/vllm/issues/8176)
## Setup ## Setup
......
...@@ -190,7 +190,7 @@ class MoeWNA16Method(FusedMoEMethodBase): ...@@ -190,7 +190,7 @@ class MoeWNA16Method(FusedMoEMethodBase):
group_size = self.quant_config.group_size group_size = self.quant_config.group_size
group_size_div_factor = 1 group_size_div_factor = 1
# make intermediate_size and hidden_size diviable by group_size # make intermediate_size and hidden_size divisible by group_size
# we reduce the group size to ensure that # we reduce the group size to ensure that
# and we would repeat the loaded_weight later # and we would repeat the loaded_weight later
while intermediate_size_per_partition % group_size or \ while intermediate_size_per_partition % group_size or \
......
...@@ -19,7 +19,7 @@ class MarlinWorkspace: ...@@ -19,7 +19,7 @@ class MarlinWorkspace:
def __init__(self, out_features, min_thread_n, max_parallel): def __init__(self, out_features, min_thread_n, max_parallel):
assert (out_features % min_thread_n == 0), ( assert (out_features % min_thread_n == 0), (
"out_features = {} is undivisible by min_thread_n = {}".format( "out_features = {} is indivisible by min_thread_n = {}".format(
out_features, min_thread_n)) out_features, min_thread_n))
max_workspace_size = ((out_features // min_thread_n) * max_parallel) max_workspace_size = ((out_features // min_thread_n) * max_parallel)
......
...@@ -649,7 +649,7 @@ def _sample_with_torch( ...@@ -649,7 +649,7 @@ def _sample_with_torch(
else: else:
sampled_token_ids_tensor = None sampled_token_ids_tensor = None
# Counterintiutively, having two loops here is actually faster. # Counterintuitively, having two loops here is actually faster.
# The first loop can run without waiting on GPU<->CPU sync. # The first loop can run without waiting on GPU<->CPU sync.
for sampling_type in SamplingType: for sampling_type in SamplingType:
sample_indices = categorized_sample_indices[sampling_type] sample_indices = categorized_sample_indices[sampling_type]
......
...@@ -1524,7 +1524,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal, ...@@ -1524,7 +1524,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
return None return None
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video). # tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary
......
...@@ -738,7 +738,7 @@ class InternS1ForConditionalGeneration(nn.Module, SupportsMultiModal, ...@@ -738,7 +738,7 @@ class InternS1ForConditionalGeneration(nn.Module, SupportsMultiModal,
return [] return []
# The result multimodal_embeddings is tuple of tensors, with each # The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video). # tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = () multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary # NOTE: It is important to iterate over the keys in this dictionary
......
...@@ -662,7 +662,7 @@ def pad_and_concat_to_dim3( ...@@ -662,7 +662,7 @@ def pad_and_concat_to_dim3(
max_len = max(f.shape[-1] for f in features) max_len = max(f.shape[-1] for f in features)
# Ensure all features have dim=3 # Ensure all features have dim=3
features = [f.view(-1, *f.shape[-2:]) for f in features] features = [f.view(-1, *f.shape[-2:]) for f in features]
# Pad and oncatenate: # Pad and concatenate:
# [[B1, 80, M1], [B2, 80, M2]] -> [B1+B2, 80, max(M1, M2)] # [[B1, 80, M1], [B2, 80, M2]] -> [B1+B2, 80, max(M1, M2)]
features = [F.pad(f, (0, max_len - f.shape[-1])) for f in features] features = [F.pad(f, (0, max_len - f.shape[-1])) for f in features]
return torch.cat(features) return torch.cat(features)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment