Unverified Commit 2f1c19b2 authored by Ning Xie's avatar Ning Xie Committed by GitHub
Browse files

[CI] change spell checker from codespell to typos (#18711)


Signed-off-by: default avatarAndy Xie <andy.xning@gmail.com>
parent 42f52cc9
...@@ -118,7 +118,7 @@ def run_test( ...@@ -118,7 +118,7 @@ def run_test(
# default to enforce_eager=True if enforce_eager # default to enforce_eager=True if enforce_eager
# is left unspecified. However, the # is left unspecified. However, the
# VllmRunner test fixture (which wraps around the LLM class) defaults to # VllmRunner test fixture (which wraps around the LLM class) defaults to
# enforce_eager=False (a behavior which a number of already-exisitng # enforce_eager=False (a behavior which a number of already-existing
# decoder-only unit tests expect), so when testing an encoder/decoder # decoder-only unit tests expect), so when testing an encoder/decoder
# model we must explicitly specify enforce_eager=True in the VllmRunner # model we must explicitly specify enforce_eager=True in the VllmRunner
# constructor. # constructor.
......
...@@ -248,7 +248,7 @@ def test_temperature_zero_target_distribution(seed: int, device: str): ...@@ -248,7 +248,7 @@ def test_temperature_zero_target_distribution(seed: int, device: str):
size=(batch_size, 1), size=(batch_size, 1),
dtype=torch.int64) dtype=torch.int64)
# The target probaility distribution is a temperature zero distribution # The target probaility distribution is a temperature zero distribution
# with zero entroy. Since our draft token ids don't match the probability # with zero entropy. Since our draft token ids don't match the probability
# 1.0 tokens in the target distribution we will reject all of them and # 1.0 tokens in the target distribution we will reject all of them and
# fallback to the greedy sampling for selecting 1 token for each sequence. # fallback to the greedy sampling for selecting 1 token for each sequence.
# Verify the same. # Verify the same.
......
...@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed: ...@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
* Test greedy equality under various number of speculative tokens. * Test greedy equality under various number of speculative tokens.
With those tests, we can say at least, EAGLE would not break the With those tests, we can say at least, EAGLE would not break the
correctess for the target model outputs. correctness for the target model outputs.
""" """
import pytest import pytest
......
...@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed: ...@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
* Test greedy equality under various number of speculative tokens. * Test greedy equality under various number of speculative tokens.
With those tests, we can say at least, Medusa would not break the With those tests, we can say at least, Medusa would not break the
correctess for the target model outputs. correctness for the target model outputs.
""" """
import pytest import pytest
......
...@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed: ...@@ -18,7 +18,7 @@ However, we still need to verify below scenario could be passed:
* Test greedy equality under various number of speculative tokens. * Test greedy equality under various number of speculative tokens.
With those tests, we can say at least, mtp would not break the With those tests, we can say at least, mtp would not break the
correctess for the target model outputs. correctness for the target model outputs.
""" """
import pytest import pytest
......
...@@ -22,8 +22,8 @@ However, we still need to verify below scenario could be passed: ...@@ -22,8 +22,8 @@ However, we still need to verify below scenario could be passed:
* Test greedy equality under preemption * Test greedy equality under preemption
* Test greedy equality under various ngram sizes / speculative sizes * Test greedy equality under various ngram sizes / speculative sizes
With those tests, we can say at least, ngram spec would not break the correctess With those tests, we can say at least, ngram spec would not break the
for the target model outputs. correctness for the target model outputs.
""" """
import pytest import pytest
......
...@@ -30,7 +30,7 @@ model_config = { ...@@ -30,7 +30,7 @@ model_config = {
]) ])
@pytest.mark.parametrize("batch_size", [5]) @pytest.mark.parametrize("batch_size", [5])
@pytest.mark.parametrize("seed", [1]) @pytest.mark.parametrize("seed", [1])
def test_sliding_window_retrival(monkeypatch, model, batch_size, seed): def test_sliding_window_retrieval(monkeypatch, model, batch_size, seed):
""" """
The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then
asks for value of one of them (which is outside the sliding window). asks for value of one of them (which is outside the sliding window).
......
...@@ -7,7 +7,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import ( ...@@ -7,7 +7,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import (
from .utils import create_request, create_scheduler, create_vllm_config from .utils import create_request, create_scheduler, create_vllm_config
def test_basic_inferface(): def test_basic_interface():
"""Unit test for basic NixlConnector interface functionality.""" """Unit test for basic NixlConnector interface functionality."""
vllm_config = create_vllm_config() vllm_config = create_vllm_config()
...@@ -25,7 +25,7 @@ def test_basic_inferface(): ...@@ -25,7 +25,7 @@ def test_basic_inferface():
scheduler.add_request(request) scheduler.add_request(request)
# Remote Prefill, triggers NixlConnectorMetdata. # Remote Prefill, triggers NixlConnectorMetadata.
scheduler_output = scheduler.schedule() scheduler_output = scheduler.schedule()
kv_connector_metadata = scheduler_output.kv_connector_metadata kv_connector_metadata = scheduler_output.kv_connector_metadata
assert kv_connector_metadata is not None assert kv_connector_metadata is not None
......
...@@ -32,7 +32,7 @@ def test_prompt_logprobs_e2e(): ...@@ -32,7 +32,7 @@ def test_prompt_logprobs_e2e():
), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}" ), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}"
def test_promt_logprobs_e2e_server(): def test_prompt_logprobs_e2e_server():
with RemoteOpenAIServer(MODEL, SERVER_ARGS) as remote_server: with RemoteOpenAIServer(MODEL, SERVER_ARGS) as remote_server:
url = f"{remote_server.url_for('v1')}/completions" url = f"{remote_server.url_for('v1')}/completions"
......
...@@ -209,32 +209,32 @@ def test_multi_step_model_runner_input(): ...@@ -209,32 +209,32 @@ def test_multi_step_model_runner_input():
received_model_input = (StatefulModelInput.from_broadcasted_tensor_dict( received_model_input = (StatefulModelInput.from_broadcasted_tensor_dict(
tensor_dict, attn_backend=attn_backend)) tensor_dict, attn_backend=attn_backend))
receieved_frozen_input = received_model_input.frozen_model_input received_frozen_input = received_model_input.frozen_model_input
# Check that received copy has correct values. # Check that received copy has correct values.
assert isinstance(received_model_input, StatefulModelInput) assert isinstance(received_model_input, StatefulModelInput)
assert receieved_frozen_input.input_tokens is not None assert received_frozen_input.input_tokens is not None
assert (receieved_frozen_input.input_tokens == assert (received_frozen_input.input_tokens ==
frozen_model_input.input_tokens).all() frozen_model_input.input_tokens).all()
assert receieved_frozen_input.input_positions is not None assert received_frozen_input.input_positions is not None
assert (receieved_frozen_input.input_positions == assert (received_frozen_input.input_positions ==
frozen_model_input.input_positions).all() frozen_model_input.input_positions).all()
assert receieved_frozen_input.multi_modal_kwargs is None assert received_frozen_input.multi_modal_kwargs is None
assert (frozen_model_input.multi_modal_kwargs == assert (frozen_model_input.multi_modal_kwargs ==
frozen_model_input.multi_modal_kwargs) frozen_model_input.multi_modal_kwargs)
assert receieved_frozen_input.lora_requests is None assert received_frozen_input.lora_requests is None
assert (receieved_frozen_input.lora_requests == assert (received_frozen_input.lora_requests ==
frozen_model_input.lora_requests) frozen_model_input.lora_requests)
assert receieved_frozen_input.lora_mapping is None assert received_frozen_input.lora_mapping is None
assert ( assert (
receieved_frozen_input.lora_mapping == frozen_model_input.lora_mapping) received_frozen_input.lora_mapping == frozen_model_input.lora_mapping)
for field in dataclasses.fields(AttentionMetadata): for field in dataclasses.fields(AttentionMetadata):
assert getattr(receieved_frozen_input.attn_metadata, field.name, assert getattr(received_frozen_input.attn_metadata, field.name,
None) == getattr(attn_metadata, field.name, None) None) == getattr(attn_metadata, field.name, None)
# For sampling metadata, only selected_token_indices is copied. # For sampling metadata, only selected_token_indices is copied.
assert (receieved_frozen_input.sampling_metadata.selected_token_indices == assert (received_frozen_input.sampling_metadata.selected_token_indices ==
sampling_metadata.selected_token_indices) sampling_metadata.selected_token_indices)
assert receieved_frozen_input.sampling_metadata.seq_groups is None assert received_frozen_input.sampling_metadata.seq_groups is None
# check non frozen fields # check non frozen fields
assert received_model_input.is_last_step == model_input.is_last_step assert received_model_input.is_last_step == model_input.is_last_step
......
...@@ -116,7 +116,7 @@ def ReadTargets(log, show_all): ...@@ -116,7 +116,7 @@ def ReadTargets(log, show_all):
# If ninja.exe is rudely halted then the .ninja_log file may be # If ninja.exe is rudely halted then the .ninja_log file may be
# corrupt. Silently continue. # corrupt. Silently continue.
continue continue
start, end, _, name, cmdhash = parts # Ignore restat. start, end, _, name, cmdhash = parts # Ignore restart.
# Convert from integral milliseconds to float seconds. # Convert from integral milliseconds to float seconds.
start = int(start) / 1000.0 start = int(start) / 1000.0
end = int(end) / 1000.0 end = int(end) / 1000.0
......
[files]
# these files may be written in non english words
extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
"benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
"vllm/third_party/*"]
ignore-hidden = true
ignore-files = true
ignore-dot = true
ignore-vcs = true
ignore-global = true
ignore-parent = true
[default]
binary = false
check-filename = false
check-file = true
unicode = true
ignore-hex = true
identifier-leading-digits = false
locale = "en"
extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
".*ot.*", ".*[Tt]h[rR].*"]
extend-ignore-words-re = []
extend-ignore-re = []
[default.extend-identifiers]
bbc5b7ede = "bbc5b7ede"
womens_doubles = "womens_doubles"
v_2nd = "v_2nd"
splitted_input = "splitted_input"
NOOPs = "NOOPs"
typ = "typ"
nin_shortcut = "nin_shortcut"
UperNetDecoder = "UperNetDecoder"
subtile = "subtile"
cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
SFOuput = "SFOuput"
# huggingface transformers repo uses these words
depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
depthwise_seperable_CNN = "depthwise_seperable_CNN"
[default.extend-words]
iy = "iy"
tendencias = "tendencias"
# intel cpu features
tme = "tme"
dout = "dout"
Pn = "Pn"
arange = "arange"
[type.py]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.py.extend-identifiers]
arange = "arange"
NDArray = "NDArray"
EOFError = "EOFError"
[type.py.extend-words]
[type.cpp]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.cpp.extend-identifiers]
countr_one = "countr_one"
[type.cpp.extend-words]
[type.rust]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.rust.extend-identifiers]
flate2 = "flate2"
[type.rust.extend-words]
ser = "ser"
[type.lock]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.lock.extend-identifiers]
[type.lock.extend-words]
[type.jl]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.jl.extend-identifiers]
[type.jl.extend-words]
modul = "modul"
egals = "egals"
usig = "usig"
egal = "egal"
[type.go]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.go.extend-identifiers]
flate = "flate"
[type.go.extend-words]
[type.css]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.css.extend-identifiers]
nd = "nd"
[type.css.extend-words]
[type.man]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.man.extend-identifiers]
Nd = "Nd"
[type.man.extend-words]
[type.cert]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.cert.extend-identifiers]
[type.cert.extend-words]
[type.sh]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.sh.extend-identifiers]
stap = "stap"
ot = "ot"
[type.sh.extend-words]
[type.vimscript]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[type.vimscript.extend-identifiers]
windo = "windo"
[type.vimscript.extend-words]
...@@ -1550,10 +1550,10 @@ def moe_wna16_gemm(input: torch.Tensor, output: torch.Tensor, ...@@ -1550,10 +1550,10 @@ def moe_wna16_gemm(input: torch.Tensor, output: torch.Tensor,
def topk_softmax(topk_weights: torch.Tensor, topk_ids: torch.Tensor, def topk_softmax(topk_weights: torch.Tensor, topk_ids: torch.Tensor,
token_expert_indicies: torch.Tensor, token_expert_indices: torch.Tensor,
gating_output: torch.Tensor) -> None: gating_output: torch.Tensor) -> None:
torch.ops._moe_C.topk_softmax(topk_weights, topk_ids, torch.ops._moe_C.topk_softmax(topk_weights, topk_ids, token_expert_indices,
token_expert_indicies, gating_output) gating_output)
def moe_wna16_marlin_gemm(input: torch.Tensor, output: Optional[torch.Tensor], def moe_wna16_marlin_gemm(input: torch.Tensor, output: Optional[torch.Tensor],
......
...@@ -373,7 +373,7 @@ class CommonAttentionState(AttentionState): ...@@ -373,7 +373,7 @@ class CommonAttentionState(AttentionState):
f"Expected attn_backend name to be either 'XFORMERS'," \ f"Expected attn_backend name to be either 'XFORMERS'," \
f"'ROCM_FLASH', or 'FLASH_ATTN', but " \ f"'ROCM_FLASH', or 'FLASH_ATTN', but " \
f"got '{self.runner.attn_backend.get_name()}'" f"got '{self.runner.attn_backend.get_name()}'"
self._add_additonal_input_buffers_for_enc_dec_model( self._add_additional_input_buffers_for_enc_dec_model(
attn_metadata=attn_metadata, input_buffers=input_buffers) attn_metadata=attn_metadata, input_buffers=input_buffers)
return input_buffers return input_buffers
...@@ -427,7 +427,7 @@ class CommonAttentionState(AttentionState): ...@@ -427,7 +427,7 @@ class CommonAttentionState(AttentionState):
attn_metadata.max_encoder_seq_len = self.runner.max_seq_len_to_capture attn_metadata.max_encoder_seq_len = self.runner.max_seq_len_to_capture
attn_metadata.num_encoder_tokens = 0 attn_metadata.num_encoder_tokens = 0
def _add_additonal_input_buffers_for_enc_dec_model( def _add_additional_input_buffers_for_enc_dec_model(
self, attn_metadata, input_buffers: Dict[str, Any]): self, attn_metadata, input_buffers: Dict[str, Any]):
""" """
Saves additional input buffers specific to the encoder-decoder model Saves additional input buffers specific to the encoder-decoder model
......
...@@ -40,7 +40,7 @@ class Internlm2ToolParser(ToolParser): ...@@ -40,7 +40,7 @@ class Internlm2ToolParser(ToolParser):
request.skip_special_tokens = False request.skip_special_tokens = False
return request return request
def get_argments(self, obj): def get_arguments(self, obj):
if "parameters" in obj: if "parameters" in obj:
return obj.get("parameters") return obj.get("parameters")
elif "arguments" in obj: elif "arguments" in obj:
...@@ -119,9 +119,9 @@ class Internlm2ToolParser(ToolParser): ...@@ -119,9 +119,9 @@ class Internlm2ToolParser(ToolParser):
# now we know we're on the same tool call and we're streaming # now we know we're on the same tool call and we're streaming
# arguments # arguments
else: else:
prev_arguments = self.get_argments( prev_arguments = self.get_arguments(
self.prev_tool_call_arr[self.current_tool_id]) self.prev_tool_call_arr[self.current_tool_id])
cur_arguments = self.get_argments(tool_call_arr) cur_arguments = self.get_arguments(tool_call_arr)
# not arguments generated # not arguments generated
if not cur_arguments and not prev_arguments: if not cur_arguments and not prev_arguments:
...@@ -170,7 +170,7 @@ class Internlm2ToolParser(ToolParser): ...@@ -170,7 +170,7 @@ class Internlm2ToolParser(ToolParser):
# check to see if the name is defined and has been sent. if so, # check to see if the name is defined and has been sent. if so,
# stream the name - otherwise keep waiting # stream the name - otherwise keep waiting
# finish by setting old and returning None as base case # finish by setting old and returning None as base case
tool_call_arr["arguments"] = self.get_argments(tool_call_arr) tool_call_arr["arguments"] = self.get_arguments(tool_call_arr)
self.prev_tool_call_arr = [tool_call_arr] self.prev_tool_call_arr = [tool_call_arr]
return delta return delta
except Exception: except Exception:
......
...@@ -1202,7 +1202,7 @@ class LinearScalingRotaryEmbeddingWithLoRA(BaseLayerWithLoRA): ...@@ -1202,7 +1202,7 @@ class LinearScalingRotaryEmbeddingWithLoRA(BaseLayerWithLoRA):
multiple LoRA adapters with a specialized kernel. multiple LoRA adapters with a specialized kernel.
Replace LinearScalingRotaryEmbedding with MultiLinearScalingRotaryEmbedding Replace LinearScalingRotaryEmbedding with MultiLinearScalingRotaryEmbedding
which can handle multi lora adapters in a specialied kernel. which can handle multi lora adapters in a specialized kernel.
""" """
def __init__(self, base_layer: RotaryEmbedding) -> None: def __init__(self, base_layer: RotaryEmbedding) -> None:
......
...@@ -68,11 +68,11 @@ def convert_mapping( ...@@ -68,11 +68,11 @@ def convert_mapping(
LoRA indices. LoRA indices.
sampler_indices: Tensor of shape [batch_size] mapping requests to sampler_indices: Tensor of shape [batch_size] mapping requests to
LoRA indices for sampler. For generation, this will be the LoRA indices for sampler. For generation, this will be the
same as base_indicies. For prefill, this will map requests same as base_indices. For prefill, this will map requests
to LoRA indices. to LoRA indices.
sampler_indices_padded: Tensor of shape [batch_size] mapping sampler_indices_padded: Tensor of shape [batch_size] mapping
requests to LoRA indices for sampler with padding. requests to LoRA indices for sampler with padding.
Same as sampler_indicies, but -1 is replaced with Same as sampler_indices, but -1 is replaced with
max_loras. max_loras.
embeddings_indices: Tensor of shape [2, batch_size] mapping embeddings_indices: Tensor of shape [2, batch_size] mapping
requests to embedding indices. First row is for embeddings requests to embedding indices. First row is for embeddings
......
...@@ -319,7 +319,7 @@ class MambaMixer2(CustomOp): ...@@ -319,7 +319,7 @@ class MambaMixer2(CustomOp):
n_groups == 1, # if there was only one group n_groups == 1, # if there was only one group
) )
intermediate_settings = (intermediate_size, 0, False) intermediate_settings = (intermediate_size, 0, False)
head_setings = (self.num_heads, 0, False) head_settings = (self.num_heads, 0, False)
# - the weight already has a "weight_loader" attribute # - the weight already has a "weight_loader" attribute
# which set_weight_attrs will raise if we do not # which set_weight_attrs will raise if we do not
...@@ -372,7 +372,7 @@ class MambaMixer2(CustomOp): ...@@ -372,7 +372,7 @@ class MambaMixer2(CustomOp):
intermediate_settings, intermediate_settings,
group_shard_settings, group_shard_settings,
group_shard_settings, group_shard_settings,
head_setings, # for dt head_settings, # for dt
], ],
self.tp_size, self.tp_size,
tp_rank, tp_rank,
......
...@@ -516,7 +516,7 @@ def _chunk_state_varlen_kernel( ...@@ -516,7 +516,7 @@ def _chunk_state_varlen_kernel(
offs_n[None, :] * stride_chunk_states_dstate) offs_n[None, :] * stride_chunk_states_dstate)
else: else:
# - this seems repetitve, buts its to help the compiler # - this seems repetitive, buts its to help the compiler
if start_idx < pid_c * chunk_size: if start_idx < pid_c * chunk_size:
past_states_ptrs = chunk_states_ptr + ( past_states_ptrs = chunk_states_ptr + (
offs_m[:, None] * stride_chunk_states_hdim + offs_m[:, None] * stride_chunk_states_hdim +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment