Unverified Commit 6a895197 authored by Jiayi Yan's avatar Jiayi Yan Committed by GitHub
Browse files

[Bugfix][CI] fix typos (#34934)


Signed-off-by: default avatar1195343015 <1195343015@qq.com>
Signed-off-by: default avatarJiayi Yan <66017932+1195343015@users.noreply.github.com>
Co-authored-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 8c760b6a
......@@ -162,7 +162,7 @@ Ns = [1024]
TOPKs = [4, 1]
Es = [32]
DTYPEs = [torch.bfloat16]
FUSED_MOE_CHUNK_SIZEs = [None, 16]
FUSED_MOE_CHUNK_SIZES = [None, 16]
def is_nyi_config(config: Config) -> bool:
......@@ -192,7 +192,7 @@ def generate_valid_test_cases(
DTYPEs,
MK_QUANT_CONFIGS,
product(prepare_finalize_types, MK_FUSED_EXPERT_TYPES),
FUSED_MOE_CHUNK_SIZEs,
FUSED_MOE_CHUNK_SIZES,
):
total = total + 1
......@@ -266,7 +266,7 @@ def test_modular_kernel_combinations_multigpu(
if cuda_device_count_stateless() < world_size:
pytest.skip(
f"Not enough GPUs available to run, got "
f"{cuda_device_count_stateless()} exepected "
f"{cuda_device_count_stateless()} expected "
f"{world_size}."
)
......
......@@ -87,7 +87,7 @@ MSGS = [
{
"role": "user",
"content": "Could you please rewrite the below article? \n\n My English needs "
"improvving, maybe I make errors.",
"improving, maybe I make errors.",
},
{
"role": "assistant",
......@@ -98,7 +98,7 @@ MSGS = [
"type": "function",
"function": {
"name": "rewrite",
"arguments": '{"text":"My English needs improvving, maybe '
"arguments": '{"text":"My English needs improving, maybe '
'I make errors."}',
},
}
......
......@@ -14,7 +14,7 @@ MAX_MODEL_LEN = 512
# Example from https://huggingface.co/BAAI/bge-m3
sentences_1 = ["What is BGE M3?", "Defination of BM25"]
sentences_1 = ["What is BGE M3?", "Definition of BM25"]
sentences_2 = [
"BGE M3 is an embedding model supporting dense retrieval, "
"lexical matching and multi-vector interaction.",
......
......@@ -719,7 +719,7 @@ def isaac_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
# Convert to tuple or None
all_hidden_states = tuple(hidden_states_list) if output_hidden_states else None
# Include hiden_states for compatibility with hidden_states_to_seq_logprobs()
# Include hidden_states for compatibility with hidden_states_to_seq_logprobs()
return BaseModelOutputWithPast(
last_hidden_state=hidden_states,
past_key_values=past_key_values,
......@@ -1226,7 +1226,7 @@ def voxtral_patch_hf_runner(hf_model: "HfRunner") -> "HfRunner":
dicts (accepting ``url``, ``path``, or ``base64`` audio) rather than
the standard ``processor(text=, audio=, sampling_rate=)`` interface.
2. HfRunner.get_inputs cannot handle multi-audio per prompt because it
mis-unpacks ``[(arr1, sr1), (arr2, sr2)]`` via a ``len == 2`` check.
incorrectly unpacks ``[(arr1, sr1), (arr2, sr2)]`` via a ``len == 2`` check.
We override ``get_inputs`` to build conversation dicts and call
``apply_chat_template`` directly, bypassing both issues. We also wrap
......
......@@ -25,7 +25,7 @@ def set_test_environment():
os.environ["FLASHINFER_NVCC_THREADS"] = "16"
# Overide the backbone layers to 4 for faster startup
# Override the backbone layers to 4 for faster startup
HF_OVERRIDE_TEXT = {
"num_layers": 4,
"num_hidden_layers": 4,
......
......@@ -206,8 +206,8 @@ def test_resolve_chat_template_kwargs(sample_json_schema, model, expected_kwargs
chat_template_kwargs = {
# both unused
"unsed_kwargs_1": 123,
"unsed_kwargs_2": "abc",
"unused_kwargs_1": 123,
"unused_kwargs_2": "abc",
# should not appear
"chat_template": "{% Hello world! %}",
"tokenize": True,
......
......@@ -853,7 +853,7 @@ def test_vllm_config_defaults_are_none():
@pytest.mark.parametrize(
("model_id", "compiliation_config", "optimization_level"),
("model_id", "compilation_config", "optimization_level"),
[
(
None,
......@@ -895,7 +895,7 @@ def test_vllm_config_defaults_are_none():
("RedHatAI/DeepSeek-V2.5-1210-FP8", CompilationConfig(), OptimizationLevel.O3),
],
)
def test_vllm_config_defaults(model_id, compiliation_config, optimization_level):
def test_vllm_config_defaults(model_id, compilation_config, optimization_level):
"""Test that optimization-level defaults are correctly applied."""
model_config = None
......@@ -903,12 +903,12 @@ def test_vllm_config_defaults(model_id, compiliation_config, optimization_level)
model_config = ModelConfig(model_id)
vllm_config = VllmConfig(
model_config=model_config,
compilation_config=compiliation_config,
compilation_config=compilation_config,
optimization_level=optimization_level,
)
else:
vllm_config = VllmConfig(
compilation_config=compiliation_config,
compilation_config=compilation_config,
optimization_level=optimization_level,
)
# Use the global optimization level defaults
......
......@@ -106,7 +106,7 @@ def test_extract_tool_calls_no_tools(seed_oss_tool_parser):
@pytest.mark.parametrize(
ids=[
"tool_call_0_thinking_budget",
"tool_call_512_thinkg_budget",
"tool_call_512_thinking_budget",
"tool_call_unlimited_thinking_budget",
],
argnames=["model_output", "expected_tool_calls", "expected_content"],
......@@ -308,7 +308,7 @@ def stream_delta_message_generator(
@pytest.mark.parametrize(
ids=[
"tool_call_0_thinking_budget",
"tool_call_512_thinkg_budget",
"tool_call_512_thinking_budget",
"tool_call_unlimited_thinking_budget",
],
argnames=["model_output", "expected_tool_calls", "expected_content"],
......
......@@ -34,10 +34,10 @@ def test_list_filtered_repo_files(
subfolder.mkdir()
(path_tmp_dir / "json_file.json").touch()
(path_tmp_dir / "correct_2.txt").touch()
(path_tmp_dir / "uncorrect.txt").touch()
(path_tmp_dir / "uncorrect.jpeg").touch()
(path_tmp_dir / "incorrect.txt").touch()
(path_tmp_dir / "incorrect.jpeg").touch()
(subfolder / "correct.txt").touch()
(subfolder / "uncorrect_sub.txt").touch()
(subfolder / "incorrect_sub.txt").touch()
def _glob_path() -> list[str]:
return [
......@@ -86,7 +86,7 @@ def test_one_filtered_repo_files(allow_patterns: list[str], expected_bool: bool)
path_tmp_dir = Path(tmp_dir)
subfolder = path_tmp_dir / "subfolder"
subfolder.mkdir()
(path_tmp_dir / "uncorrect.jpeg").touch()
(path_tmp_dir / "incorrect.jpeg").touch()
(subfolder / "correct.txt").touch()
def _glob_path() -> list[str]:
......
......@@ -308,7 +308,7 @@ def test_free_kv_cache_block_queue_append_n():
# Create an empty FreeKVCacheBlockQueue
invalid_queue = FreeKVCacheBlockQueue([])
# set prev_free_block to None and this will cause assertation in append_n
# set prev_free_block to None and this will cause assertion in append_n
invalid_queue.fake_free_list_tail.prev_free_block = None
with pytest.raises(AssertionError):
# Append 1 block
......
......@@ -2304,22 +2304,22 @@ def test_block_lookup_cache_single_block_per_key():
assert cache.get_one_block(key0) is block0
assert cache.get_one_block(key1) is block1
assert cache.get_one_block(key2) is None
# No block poped due to block_id mismatch
# No block popped due to block_id mismatch
assert cache.pop(key0, 100) is None
assert cache.get_one_block(key0) is block0
assert cache.get_one_block(key1) is block1
assert cache.get_one_block(key2) is None
# block poped with (key0, block ID 0)
# block popped with (key0, block ID 0)
assert cache.pop(key0, 0) is block0
assert cache.get_one_block(key0) is None
assert cache.get_one_block(key1) is block1
assert cache.get_one_block(key2) is None
# No block poped due to block_id mismatch
# No block popped due to block_id mismatch
assert cache.pop(key0, 1) is None
assert cache.get_one_block(key0) is None
assert cache.get_one_block(key1) is block1
assert cache.get_one_block(key2) is None
# block poped with (key1, block ID 1)
# block popped with (key1, block ID 1)
assert cache.pop(key1, 1) is block1
assert cache.get_one_block(key0) is None
assert cache.get_one_block(key1) is None
......
......@@ -140,7 +140,7 @@ def _mock_draft_token_ids(
return DraftTokenIds(req_ids=request_ids, draft_token_ids=sampled_token_ids)
def _chech_valid_scheduler_output(
def _check_valid_scheduler_output(
scheduler_output: SchedulerOutput,
seen_request_ids: set[str],
seen_mm_hashes: set[str],
......@@ -242,7 +242,7 @@ def test_priority_scheduling_blast(
)
scheduler.add_request(req)
scheduler_output = scheduler.schedule()
_chech_valid_scheduler_output(
_check_valid_scheduler_output(
scheduler_output, seen_request_ids, seen_mm_hashes
)
model_output = _mock_execute_model(
......
......@@ -1116,7 +1116,7 @@ def _step_until_done(
def _step_until_kv_transfer_finished(scheduler: Scheduler, req_ids: list[str]):
"""Cycle requests through a KV transfer cyle."""
"""Cycle requests through a KV transfer cycle."""
# Requests should first transition to WAITING_FOR_REMOTE_KVS
output = scheduler.schedule()
......@@ -2714,7 +2714,7 @@ def _assert_right_encoder_inputs(
if expected_total_reqs == 0:
return
# Number of expected enocder inputs should match number of requests
# Number of expected encoder inputs should match number of requests
if expected_encoder_inputs:
assert check_exist and requests is not None # only support expect input exist
assert len(requests) == len(expected_encoder_inputs)
......@@ -2964,7 +2964,7 @@ def test_ec_connector_with_partial_cache_hit_multi_round(use_kv_connector):
)
scheduler.update_from_output(output, model_output)
# request1 is finished after outputing 1 token
# request1 is finished after outputting 1 token
# Finish request
scheduler.finish_requests(request1.request_id, RequestStatus.FINISHED_LENGTH_CAPPED)
......@@ -3060,14 +3060,14 @@ def test_ec_connector_schedule_multiple_requests(cache_exist, use_kv_connector):
for request in requests:
scheduler.add_request(request)
# Set up to test different encoder cache exsistence scenario after preemption
# Set up to test different encoder cache existence scenario after preemption
# Order of getting encoder cache should be: local cache -> connector-> compute
scheduler.ec_connector.update_state_after_alloc = Mock(
wraps=scheduler.ec_connector.update_state_after_alloc
)
if cache_exist == "local":
# Allocate cache to cache manager manually to mimick
# Allocate cache to cache manager manually to mimic
for req in requests:
scheduler.encoder_cache_manager.allocate(req, 0)
else:
......@@ -3384,13 +3384,13 @@ def test_priority_scheduling_ec_connector_preemption_and_resumption(
pooler_output=[],
)
# Finish the requests to make room for the preempted requests to resume
# req_high is finished after outputing 2 tokens
# req_high is finished after outputting 2 tokens
scheduler.update_from_output(output, model_output)
scheduler.finish_requests(
request_high.request_id, RequestStatus.FINISHED_LENGTH_CAPPED
)
# Set up to test different encoder cache exsistence scenario after preemption
# Set up to test different encoder cache existence scenario after preemption
# Order of getting encoder cache should be: local cache -> connector-> compute
# By default, the cache should still exist in local in this test case
if cache_exist != "local":
......@@ -3483,7 +3483,7 @@ def test_ec_connector_allocate_encoder_tokens_with_external_load(use_kv_connecto
ec_role="ec_consumer",
)
# Limit the number of availiable slots of EncoderCacheManager
# Limit the number of available slots of EncoderCacheManager
scheduler.encoder_cache_manager = EncoderCacheManager(cache_size=32)
# Create MM request1
......@@ -3574,7 +3574,7 @@ def test_ec_connector_allocate_encoder_tokens_with_external_load(use_kv_connecto
)
scheduler.update_from_output(output, model_output)
# request1 is finished after outputing 1 token
# request1 is finished after outputting 1 token
# Finish request
scheduler.finish_requests(request1.request_id, RequestStatus.FINISHED_LENGTH_CAPPED)
assert scheduler.get_num_unfinished_requests() == 1
......
......@@ -76,11 +76,11 @@ def get_fake_sample_fn() -> SamplerOutput:
),
logprobs_tensors=None,
)
accpeted_tokens = prompt_token_ids[
accepted_tokens = prompt_token_ids[
first_token_id_index : first_token_id_index
+ min(num_accepted_tokens, logits.shape[0])
]
sampled_token_ids = accpeted_tokens
sampled_token_ids = accepted_tokens
return SamplerOutput(
sampled_token_ids=torch.tensor(
[sampled_token_ids], device="cuda", dtype=torch.int32
......
......@@ -911,7 +911,7 @@ def test_structured_output_with_structural_tag(backend: str):
),
)
prompt = "Hello and repete hello 10 times, do not say anything else. Only say hello hello hello, now start"
prompt = "Hello and repeat hello 10 times, do not say anything else. Only say hello hello hello, now start"
outputs = llm.generate(prompt, sampling_params=sampling_params, use_tqdm=True)
assert outputs is not None
for output in outputs:
......
......@@ -99,7 +99,7 @@ def _setup_kv_transfer_request(request, remote_host="127.0.0.1", fake_port=4789)
return request
class FakeMorIIOWrapper:
class FakeMoRIIOWrapper:
# A fake MoRIIOWrapper for testing purposes
def __init__(self, *args, **kwargs):
pass
......@@ -168,7 +168,7 @@ class FakeMorIIOWrapper:
pass
class FakeMorIIOConnectorWorker(MoRIIOConnectorWorker):
class FakeMoRIIOConnectorWorker(MoRIIOConnectorWorker):
# Define a fake remote engine id for testing
REMOTE_ENGINE_ID = "remote_engine"
......@@ -373,7 +373,7 @@ def test_read_mode_loads_remote_block_ids(moriio_read_mode):
# Set remote block ids to be fetched.
request.kv_transfer_params["remote_block_ids"] = block_list
# Remote Prefill, triggers MorIIOConnectorMetadata.
# Remote Prefill, triggers MoRIIOConnectorMetadata.
scheduler_output = scheduler.schedule()
kv_connector_metadata = scheduler_output.kv_connector_metadata
......@@ -451,7 +451,7 @@ def test_register_kv_caches(mock_parallel_groups):
with set_current_vllm_config(vllm_config):
connector = MoRIIOConnector(vllm_config, KVConnectorRole.WORKER)
connector.connector_worker = FakeMorIIOConnectorWorker(
connector.connector_worker = FakeMoRIIOConnectorWorker(
vllm_config, connector.engine_id, hand_shake_latency=0
)
......@@ -528,7 +528,7 @@ def test_moriio_handshake_returns_metadata(mock_parallel_groups):
with (
patch(
"vllm.distributed.kv_transfer.kv_connector.v1.moriio.moriio_engine.MoRIIOWrapper",
FakeMorIIOWrapper,
FakeMoRIIOWrapper,
),
):
handshake_port = _find_free_port()
......
......@@ -460,9 +460,9 @@ class FakeNixlConnectorWorker(NixlConnectorWorker):
# When remote tp_size > local tp_size, handshake with multiple
# remote ranks.
num_hanshakes = 1 if tp_ratio > 0 else -tp_ratio
num_handshakes = 1 if tp_ratio > 0 else -tp_ratio
remote_agents: dict[int, str] = {}
for remote_tp_rank in range(num_hanshakes):
for remote_tp_rank in range(num_handshakes):
remote_agent_name = self.add_remote_agent(
NixlAgentMetadata(
engine_id=self.REMOTE_ENGINE_ID,
......@@ -688,7 +688,7 @@ class TestNixlHandshake:
)
check_handshake(2)
# NOTE flexiblity: a second remote with higher number of ranks is
# NOTE flexibility: a second remote with higher number of ranks is
# discovered. This is not a scenario we actively support right now, but
# the connector allows it.
worker.REMOTE_ENGINE_ID = "remote_engine_2"
......@@ -1766,7 +1766,7 @@ def test_aborted_request_removed_from_worker_in_batch(default_vllm_config, dist_
req = create_request(request_id=1, do_remote_decode=True, max_tokens=1)
scheduler.add_request(req)
# First scheduling pass - examinate build_connector_meta output
# First scheduling pass - examine build_connector_meta output
sched_out = scheduler.schedule()
kv_meta = sched_out.kv_connector_metadata
assert kv_meta is not None
......
......@@ -36,7 +36,7 @@ SAMPLE_PROMPT = BatchLogprobsComposition.SAMPLE_PROMPT
# non-associative and sensitive to batch geometry. The ref LLM (no spec
# decode, default scheduling) and the spec-decode LLM (chunked prefill,
# different effective batch sizes) follow different reduction orders,
# producing numerically divergent logprobs that get mis-attributed to
# producing numerically divergent logprobs that get misattributed to
# spec-decode incorrectness.
#
# Force LLM instances into an identical, deterministic execution
......
......@@ -726,7 +726,7 @@ def test_frequency_penalties(rejection_sampler):
spec_tokens = [[1, 1, 1], [], [1, 1, 1]]
output_tokens = [[1, 1, 1, 1], [7], [1, 1, 1, 1]] # 1, 7 and 1 are the bonus tokens
num_requsts = len(spec_tokens)
num_requests = len(spec_tokens)
logits = create_logits_tensor(output_tokens, token_idx_to_override=15)
metadata = create_sampling_metadata(
all_greedy=True,
......@@ -734,8 +734,8 @@ def test_frequency_penalties(rejection_sampler):
spec_token_ids=spec_tokens,
prompt_token_ids=torch.tensor([[5, 6, 7], [6, 7, 8], [7, 8, 9]], device=DEVICE),
frequency_penalties=[1.5, 1.5, 0.7],
presence_penalties=[0.0] * num_requsts,
repetition_penalties=[1.0] * num_requsts,
presence_penalties=[0.0] * num_requests,
repetition_penalties=[1.0] * num_requests,
)
bonus_token_tensor = torch.tensor(
[output_tokens[i][-1] for i in range(len(output_tokens))], device=logits.device
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment