Unverified Commit a9944aab authored by omahs's avatar omahs Committed by GitHub
Browse files

fix: typos (#18151)


Signed-off-by: default avataromahs <73983677+omahs@users.noreply.github.com>
parent a8f5aec2
...@@ -172,7 +172,7 @@ __device__ void paged_attention_kernel( ...@@ -172,7 +172,7 @@ __device__ void paged_attention_kernel(
// Load the query to registers. // Load the query to registers.
// Each thread in a thread group has a different part of the query. // Each thread in a thread group has a different part of the query.
// For example, if the the thread group size is 4, then the first thread in // For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the query, and the second thread // the group has 0, 4, 8, ... th vectors of the query, and the second thread
// has 1, 5, 9, ... th vectors of the query, and so on. NOTE(woosuk): Because // has 1, 5, 9, ... th vectors of the query, and so on. NOTE(woosuk): Because
// q is split from a qkv tensor, it may not be contiguous. // q is split from a qkv tensor, it may not be contiguous.
...@@ -259,7 +259,7 @@ __device__ void paged_attention_kernel( ...@@ -259,7 +259,7 @@ __device__ void paged_attention_kernel(
// Load a key to registers. // Load a key to registers.
// Each thread in a thread group has a different part of the key. // Each thread in a thread group has a different part of the key.
// For example, if the the thread group size is 4, then the first thread in // For example, if the thread group size is 4, then the first thread in
// the group has 0, 4, 8, ... th vectors of the key, and the second thread // the group has 0, 4, 8, ... th vectors of the key, and the second thread
// has 1, 5, 9, ... th vectors of the key, and so on. // has 1, 5, 9, ... th vectors of the key, and so on.
for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) { for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) {
......
...@@ -68,7 +68,7 @@ def get_current_weather(city: str, state: str, unit: 'str'): ...@@ -68,7 +68,7 @@ def get_current_weather(city: str, state: str, unit: 'str'):
"partly cloudly, with highs in the 90's.") "partly cloudly, with highs in the 90's.")
tool_funtions = {"get_current_weather": get_current_weather} tool_functions = {"get_current_weather": get_current_weather}
tools = [{ tools = [{
"type": "function", "type": "function",
...@@ -122,7 +122,7 @@ messages.append({ ...@@ -122,7 +122,7 @@ messages.append({
# above defined function # above defined function
tool_calls = json.loads(output) tool_calls = json.loads(output)
tool_answers = [ tool_answers = [
tool_funtions[call['name']](**call['arguments']) for call in tool_calls tool_functions[call['name']](**call['arguments']) for call in tool_calls
] ]
# append the answer as a tool message and let the LLM give you an answer # append the answer as a tool message and let the LLM give you an answer
......
...@@ -30,7 +30,7 @@ def test_load_checkpoints_from_huggingface(lora_fixture_name, request): ...@@ -30,7 +30,7 @@ def test_load_checkpoints_from_huggingface(lora_fixture_name, request):
lora_path = get_adapter_absolute_path(lora_name) lora_path = get_adapter_absolute_path(lora_name)
# lora loading should work for either absolute path and hugggingface id. # lora loading should work for either absolute path and huggingface id.
peft_helper = PEFTHelper.from_local_dir(lora_path, 4096) peft_helper = PEFTHelper.from_local_dir(lora_path, 4096)
lora_model = LoRAModel.from_local_checkpoint( lora_model = LoRAModel.from_local_checkpoint(
lora_path, lora_path,
......
...@@ -20,11 +20,11 @@ def test_hf_transfer_auto_activation(): ...@@ -20,11 +20,11 @@ def test_hf_transfer_auto_activation():
try: try:
# enable hf hub transfer if available # enable hf hub transfer if available
import hf_transfer # type: ignore # noqa import hf_transfer # type: ignore # noqa
HF_TRANFER_ACTIVE = True HF_TRANSFER_ACTIVE = True
except ImportError: except ImportError:
HF_TRANFER_ACTIVE = False HF_TRANSFER_ACTIVE = False
assert (huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER == assert (huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER ==
HF_TRANFER_ACTIVE) HF_TRANSFER_ACTIVE)
def test_download_weights_from_hf(): def test_download_weights_from_hf():
......
...@@ -297,7 +297,7 @@ class ModelConfig: ...@@ -297,7 +297,7 @@ class ModelConfig:
- 1K -> 1024\n - 1K -> 1024\n
- 25.6k -> 25,600""" - 25.6k -> 25,600"""
spec_target_max_model_len: Optional[int] = None spec_target_max_model_len: Optional[int] = None
"""Specify the the maximum length for spec decoding draft models.""" """Specify the maximum length for spec decoding draft models."""
quantization: Optional[QuantizationMethods] = None quantization: Optional[QuantizationMethods] = None
"""Method used to quantize the weights. If `None`, we first check the """Method used to quantize the weights. If `None`, we first check the
`quantization_config` attribute in the model config file. If that is `quantization_config` attribute in the model config file. If that is
......
...@@ -153,7 +153,7 @@ def _lora_expand( ...@@ -153,7 +153,7 @@ def _lora_expand(
lora_token_start_loc (torch.Tensor): A cumulative sum of lora_token_start_loc (torch.Tensor): A cumulative sum of
num_tokens_per_lora. lora_token_start_loc[0] is always 0 so that num_tokens_per_lora. lora_token_start_loc[0] is always 0 so that
lora_token_start_loc[i], along with num_tokens_per_lora[i] lora_token_start_loc[i], along with num_tokens_per_lora[i]
identifies the the region in token_indices_sorted_by_lora_ids that identifies the region in token_indices_sorted_by_lora_ids that
LoRA lora_ids[i] should process. LoRA lora_ids[i] should process.
lora_ids (torch.Tensor): LoRA ids to process. lora_ids (torch.Tensor): LoRA ids to process.
no_lora_flag_cpu (torch.Tensor): A CPU tensor of size 1, that indicates no_lora_flag_cpu (torch.Tensor): A CPU tensor of size 1, that indicates
......
...@@ -142,7 +142,7 @@ def mamba_v2_sharded_weight_loader( ...@@ -142,7 +142,7 @@ def mamba_v2_sharded_weight_loader(
) -> LoaderFunction: ) -> LoaderFunction:
"""Create a weight loader for mamba v2. This ensures that the projections """Create a weight loader for mamba v2. This ensures that the projections
are correctly sharded so that they can be split into x, B, C. It also are correctly sharded so that they can be split into x, B, C. It also
ensures the the all the groups corresponding to a head shard is placed ensures that all the groups corresponding to a head shard is placed
together with it. together with it.
""" """
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Inference-only IBM Granite speeech model.""" """Inference-only IBM Granite speech model."""
import math import math
from collections.abc import Iterable, Mapping from collections.abc import Iterable, Mapping
from typing import Optional, TypedDict, Union from typing import Optional, TypedDict, Union
...@@ -626,7 +626,7 @@ class GraniteSpeechForConditionalGeneration( ...@@ -626,7 +626,7 @@ class GraniteSpeechForConditionalGeneration(
audio_embed_sizes: torch.Tensor, audio_embed_sizes: torch.Tensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""Calculate the input features mask, which will generally be used """Calculate the input features mask, which will generally be used
to mask the the padded features for all entries in the batch except to mask the padded features for all entries in the batch except
for those with the most audio features. for those with the most audio features.
Args: Args:
......
...@@ -91,9 +91,9 @@ class ConformerEncoderLayer(nn.Module): ...@@ -91,9 +91,9 @@ class ConformerEncoderLayer(nn.Module):
if set to True, use GLULinear module, if set to True, use GLULinear module,
otherwise, used GLUPointWiseConv module. otherwise, used GLUPointWiseConv module.
default to False. default to False.
attention_innner_dim: int, optional attention_inner_dim: int, optional
if equal to -1, attention dim for linears k/q/v is if equal to -1, attention dim for linears k/q/v is
equal to d_model. otherwise attention_innner_dim is used. equal to d_model. otherwise attention_inner_dim is used.
default -1. default -1.
attention_glu_type: str, optional attention_glu_type: str, optional
activation function for glu used in the multihead attention, activation function for glu used in the multihead attention,
...@@ -148,7 +148,7 @@ class ConformerEncoderLayer(nn.Module): ...@@ -148,7 +148,7 @@ class ConformerEncoderLayer(nn.Module):
conv_glu_type="sigmoid", conv_glu_type="sigmoid",
bias_in_glu=True, bias_in_glu=True,
linear_glu_in_convm=False, linear_glu_in_convm=False,
attention_innner_dim=-1, attention_inner_dim=-1,
attention_glu_type="swish", attention_glu_type="swish",
activation_checkpointing="", activation_checkpointing="",
export=False, export=False,
...@@ -169,7 +169,7 @@ class ConformerEncoderLayer(nn.Module): ...@@ -169,7 +169,7 @@ class ConformerEncoderLayer(nn.Module):
n_head, n_head,
d_model, d_model,
dropout_rate, dropout_rate,
attention_innner_dim, attention_inner_dim,
attention_glu_type, attention_glu_type,
bias_in_glu, bias_in_glu,
use_pt_scaled_dot_product_attention= use_pt_scaled_dot_product_attention=
......
...@@ -72,7 +72,7 @@ class Request: ...@@ -72,7 +72,7 @@ class Request:
assert len(self.mm_inputs) == len(self.mm_hashes) assert len(self.mm_inputs) == len(self.mm_hashes)
# Read-only views # Read-only views
# Prevent directly appending to the these lists since # Prevent directly appending to these lists since
# they should also be updated simultaneously. # they should also be updated simultaneously.
self.output_token_ids = ConstantList(self._output_token_ids) self.output_token_ids = ConstantList(self._output_token_ids)
self.all_token_ids = ConstantList(self._all_token_ids) self.all_token_ids = ConstantList(self._all_token_ids)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment