Unverified Commit a9944aab authored by omahs's avatar omahs Committed by GitHub
Browse files

fix: typos (#18151)


Signed-off-by: default avataromahs <73983677+omahs@users.noreply.github.com>
parent a8f5aec2
......@@ -172,7 +172,7 @@ __device__ void paged_attention_kernel(
// Load the query to registers.
// Each thread in a thread group has a different part of the query.
// For example, if the the thread group size is 4, then the first thread in
// For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the query, and the second thread
// has 1, 5, 9, ... th vectors of the query, and so on. NOTE(woosuk): Because
// q is split from a qkv tensor, it may not be contiguous.
......@@ -259,7 +259,7 @@ __device__ void paged_attention_kernel(
// Load a key to registers.
// Each thread in a thread group has a different part of the key.
// For example, if the the thread group size is 4, then the first thread in
// For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the key, and the second thread
// has 1, 5, 9, ... th vectors of the key, and so on.
for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) {
......
......@@ -68,7 +68,7 @@ def get_current_weather(city: str, state: str, unit: 'str'):
"partly cloudly, with highs in the 90's.")
tool_funtions = {"get_current_weather": get_current_weather}
tool_functions = {"get_current_weather": get_current_weather}
tools = [{
"type": "function",
......@@ -122,7 +122,7 @@ messages.append({
# above defined function
tool_calls = json.loads(output)
tool_answers = [
tool_funtions[call['name']](**call['arguments']) for call in tool_calls
tool_functions[call['name']](**call['arguments']) for call in tool_calls
]
# append the answer as a tool message and let the LLM give you an answer
......
......@@ -30,7 +30,7 @@ def test_load_checkpoints_from_huggingface(lora_fixture_name, request):
lora_path = get_adapter_absolute_path(lora_name)
# lora loading should work for either absolute path and hugggingface id.
# lora loading should work for either absolute path and huggingface id.
peft_helper = PEFTHelper.from_local_dir(lora_path, 4096)
lora_model = LoRAModel.from_local_checkpoint(
lora_path,
......
......@@ -20,11 +20,11 @@ def test_hf_transfer_auto_activation():
try:
# enable hf hub transfer if available
import hf_transfer # type: ignore # noqa
HF_TRANFER_ACTIVE = True
HF_TRANSFER_ACTIVE = True
except ImportError:
HF_TRANFER_ACTIVE = False
HF_TRANSFER_ACTIVE = False
assert (huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER ==
HF_TRANFER_ACTIVE)
HF_TRANSFER_ACTIVE)
def test_download_weights_from_hf():
......
......@@ -297,7 +297,7 @@ class ModelConfig:
- 1K -> 1024\n
- 25.6k -> 25,600"""
spec_target_max_model_len: Optional[int] = None
"""Specify the the maximum length for spec decoding draft models."""
"""Specify the maximum length for spec decoding draft models."""
quantization: Optional[QuantizationMethods] = None
"""Method used to quantize the weights. If `None`, we first check the
`quantization_config` attribute in the model config file. If that is
......
......@@ -153,7 +153,7 @@ def _lora_expand(
lora_token_start_loc (torch.Tensor): A cumulative sum of
num_tokens_per_lora. lora_token_start_loc[0] is always 0 so that
lora_token_start_loc[i], along with num_tokens_per_lora[i]
identifies the the region in token_indices_sorted_by_lora_ids that
identifies the region in token_indices_sorted_by_lora_ids that
LoRA lora_ids[i] should process.
lora_ids (torch.Tensor): LoRA ids to process.
no_lora_flag_cpu (torch.Tensor): A CPU tensor of size 1, that indicates
......
......@@ -142,7 +142,7 @@ def mamba_v2_sharded_weight_loader(
) -> LoaderFunction:
"""Create a weight loader for mamba v2. This ensures that the projections
are correctly sharded so that they can be split into x, B, C. It also
ensures the the all the groups corresponding to a head shard is placed
ensures that all the groups corresponding to a head shard is placed
together with it.
"""
......
......@@ -21,7 +21,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Inference-only IBM Granite speeech model."""
"""Inference-only IBM Granite speech model."""
import math
from collections.abc import Iterable, Mapping
from typing import Optional, TypedDict, Union
......@@ -626,7 +626,7 @@ class GraniteSpeechForConditionalGeneration(
audio_embed_sizes: torch.Tensor,
) -> torch.Tensor:
"""Calculate the input features mask, which will generally be used
to mask the the padded features for all entries in the batch except
to mask the padded features for all entries in the batch except
for those with the most audio features.
Args:
......
......@@ -91,9 +91,9 @@ class ConformerEncoderLayer(nn.Module):
if set to True, use GLULinear module,
otherwise, used GLUPointWiseConv module.
default to False.
attention_innner_dim: int, optional
attention_inner_dim: int, optional
if equal to -1, attention dim for linears k/q/v is
equal to d_model. otherwise attention_innner_dim is used.
equal to d_model. otherwise attention_inner_dim is used.
default -1.
attention_glu_type: str, optional
activation function for glu used in the multihead attention,
......@@ -148,7 +148,7 @@ class ConformerEncoderLayer(nn.Module):
conv_glu_type="sigmoid",
bias_in_glu=True,
linear_glu_in_convm=False,
attention_innner_dim=-1,
attention_inner_dim=-1,
attention_glu_type="swish",
activation_checkpointing="",
export=False,
......@@ -169,7 +169,7 @@ class ConformerEncoderLayer(nn.Module):
n_head,
d_model,
dropout_rate,
attention_innner_dim,
attention_inner_dim,
attention_glu_type,
bias_in_glu,
use_pt_scaled_dot_product_attention=
......
......@@ -72,7 +72,7 @@ class Request:
assert len(self.mm_inputs) == len(self.mm_hashes)
# Read-only views
# Prevent directly appending to the these lists since
# Prevent directly appending to these lists since
# they should also be updated simultaneously.
self.output_token_ids = ConstantList(self._output_token_ids)
self.all_token_ids = ConstantList(self._all_token_ids)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment