Unverified Commit 9d104b5b authored by Aaron Pham's avatar Aaron Pham Committed by GitHub
Browse files

[CI/Build] Update Ruff version (#8469)


Signed-off-by: default avatarAaron Pham <contact@aarnphm.xyz>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent 6ffa3f31
...@@ -25,10 +25,10 @@ jobs: ...@@ -25,10 +25,10 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install ruff==0.1.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2 pip install -r requirements-lint.txt
- name: Analysing the code with ruff - name: Analysing the code with ruff
run: | run: |
ruff . ruff check .
- name: Spelling check with codespell - name: Spelling check with codespell
run: | run: |
codespell --toml pyproject.toml codespell --toml pyproject.toml
......
...@@ -45,8 +45,7 @@ if __name__ == "__main__": ...@@ -45,8 +45,7 @@ if __name__ == "__main__":
rows = int(math.ceil(len(results) / 2)) rows = int(math.ceil(len(results) / 2))
fig, axs = plt.subplots(rows, 2, figsize=(12, 5 * rows)) fig, axs = plt.subplots(rows, 2, figsize=(12, 5 * rows))
axs = axs.flatten() axs = axs.flatten()
axs_idx = 0 for axs_idx, (shape, data) in enumerate(results.items()):
for shape, data in results.items():
plt.sca(axs[axs_idx]) plt.sca(axs[axs_idx])
df = pd.DataFrame(data) df = pd.DataFrame(data)
sns.lineplot(data=df, sns.lineplot(data=df,
...@@ -59,6 +58,5 @@ if __name__ == "__main__": ...@@ -59,6 +58,5 @@ if __name__ == "__main__":
palette="Dark2") palette="Dark2")
plt.title(f"Shape: {shape}") plt.title(f"Shape: {shape}")
plt.ylabel("time (median, s)") plt.ylabel("time (median, s)")
axs_idx += 1
plt.tight_layout() plt.tight_layout()
plt.savefig("graph_machete_bench.pdf") plt.savefig("graph_machete_bench.pdf")
...@@ -159,7 +159,7 @@ echo 'vLLM codespell: Done' ...@@ -159,7 +159,7 @@ echo 'vLLM codespell: Done'
# Lint specified files # Lint specified files
lint() { lint() {
ruff "$@" ruff check "$@"
} }
# Lint files that differ from main branch. Ignores dirs that are not slated # Lint files that differ from main branch. Ignores dirs that are not slated
...@@ -175,7 +175,7 @@ lint_changed() { ...@@ -175,7 +175,7 @@ lint_changed() {
if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \ git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
ruff ruff check
fi fi
} }
......
...@@ -42,6 +42,8 @@ ignore = [ ...@@ -42,6 +42,8 @@ ignore = [
"E731", "E731",
# Loop control variable not used within loop body # Loop control variable not used within loop body
"B007", "B007",
# f-string format
"UP032",
] ]
[tool.mypy] [tool.mypy]
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
yapf==0.32.0 yapf==0.32.0
toml==0.10.2 toml==0.10.2
tomli==2.0.1 tomli==2.0.1
ruff==0.1.5 ruff==0.6.5
codespell==2.3.0 codespell==2.3.0
isort==5.13.2 isort==5.13.2
clang-format==18.1.5 clang-format==18.1.5
......
...@@ -158,10 +158,7 @@ def should_do_global_cleanup_after_test(request) -> bool: ...@@ -158,10 +158,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
to initialize torch. to initialize torch.
""" """
if request.node.get_closest_marker("skip_global_cleanup"): return not request.node.get_closest_marker("skip_global_cleanup")
return False
return True
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
......
...@@ -65,10 +65,7 @@ def should_do_global_cleanup_after_test(request) -> bool: ...@@ -65,10 +65,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
to initialize torch. to initialize torch.
""" """
if request.node.get_closest_marker("skip_global_cleanup"): return not request.node.get_closest_marker("skip_global_cleanup")
return False
return True
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
......
...@@ -5,7 +5,7 @@ from vllm.multimodal.base import MultiModalInputs, NestedTensors ...@@ -5,7 +5,7 @@ from vllm.multimodal.base import MultiModalInputs, NestedTensors
def assert_nested_tensors_equal(expected: NestedTensors, def assert_nested_tensors_equal(expected: NestedTensors,
actual: NestedTensors): actual: NestedTensors):
assert type(expected) == type(actual) assert type(expected) == type(actual) # noqa: E721
if isinstance(expected, torch.Tensor): if isinstance(expected, torch.Tensor):
assert torch.equal(expected, actual) assert torch.equal(expected, actual)
else: else:
......
...@@ -66,8 +66,7 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, ...@@ -66,8 +66,7 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
hashes.append([]) hashes.append([])
prompts = [prefix + prompt for prompt in sample_prompts] prompts = [prefix + prompt for prompt in sample_prompts]
seq_id = 0 for seq_id, prompt in enumerate(prompts):
for prompt in prompts:
hashes[-1].append([]) hashes[-1].append([])
prompt_token_ids = tokenizer.encode(prompt) prompt_token_ids = tokenizer.encode(prompt)
seq = Sequence(seq_id, seq = Sequence(seq_id,
...@@ -83,8 +82,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, ...@@ -83,8 +82,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
for idx in range(num_blocks): for idx in range(num_blocks):
hashes[-1][-1].append(seq.hash_of_block(idx)) hashes[-1][-1].append(seq.hash_of_block(idx))
seq_id += 1
# Check that hashes made with two prefixes with different first blocks are # Check that hashes made with two prefixes with different first blocks are
# different everywhere. # different everywhere.
for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])): for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])):
......
...@@ -111,7 +111,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist(): ...@@ -111,7 +111,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist():
configuration occurs.""" configuration occurs."""
with pytest.raises(RuntimeError) as ex_info: with pytest.raises(RuntimeError) as ex_info:
_configure_vllm_root_logger() _configure_vllm_root_logger()
assert ex_info.type == RuntimeError assert ex_info.type == RuntimeError # noqa: E721
assert "File does not exist" in str(ex_info) assert "File does not exist" in str(ex_info)
...@@ -152,7 +152,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json( ...@@ -152,7 +152,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json(
logging_config_file.name): logging_config_file.name):
with pytest.raises(ValueError) as ex_info: with pytest.raises(ValueError) as ex_info:
_configure_vllm_root_logger() _configure_vllm_root_logger()
assert ex_info.type == ValueError assert ex_info.type == ValueError # noqa: E721
assert "Invalid logging config. Expected Dict, got" in str(ex_info) assert "Invalid logging config. Expected Dict, got" in str(ex_info)
......
...@@ -453,8 +453,7 @@ def test_prepare_decode(batch_size): ...@@ -453,8 +453,7 @@ def test_prepare_decode(batch_size):
# each sequence) in the decode phase # each sequence) in the decode phase
expected_selected_token_indices = [] expected_selected_token_indices = []
selected_token_start_idx = 0 for selected_token_start_idx, seq_len in enumerate(seq_lens):
for seq_len in seq_lens:
# Compute the index offset of the final token in each # Compute the index offset of the final token in each
# sequence's decoded outputs; since a single token is # sequence's decoded outputs; since a single token is
# decoded per iteration per sequence, then the length # decoded per iteration per sequence, then the length
...@@ -463,7 +462,6 @@ def test_prepare_decode(batch_size): ...@@ -463,7 +462,6 @@ def test_prepare_decode(batch_size):
# generated tokens is 0 (i.e. the expected sampling index # generated tokens is 0 (i.e. the expected sampling index
# for a given sequence is just `selected_token_start_idx`) # for a given sequence is just `selected_token_start_idx`)
expected_selected_token_indices.append(selected_token_start_idx) expected_selected_token_indices.append(selected_token_start_idx)
selected_token_start_idx += 1
sampling_metadata = model_input.sampling_metadata sampling_metadata = model_input.sampling_metadata
actual = sampling_metadata.selected_token_indices actual = sampling_metadata.selected_token_indices
......
...@@ -241,10 +241,8 @@ def test_prepare_decode_cuda_graph(batch_size): ...@@ -241,10 +241,8 @@ def test_prepare_decode_cuda_graph(batch_size):
# Verify Sampling # Verify Sampling
expected_selected_token_indices = [] expected_selected_token_indices = []
selected_token_start_idx = 0 for selected_token_start_idx, _ in enumerate(context_lens):
for _ in context_lens:
expected_selected_token_indices.append(selected_token_start_idx) expected_selected_token_indices.append(selected_token_start_idx)
selected_token_start_idx += 1
sampling_metadata = SamplingMetadata.prepare( sampling_metadata = SamplingMetadata.prepare(
seq_group_metadata_list, seq_group_metadata_list,
seq_lens, seq_lens,
......
...@@ -42,7 +42,7 @@ def list_adapters(registered_adapters: Dict[int, Any]) -> Dict[int, Any]: ...@@ -42,7 +42,7 @@ def list_adapters(registered_adapters: Dict[int, Any]) -> Dict[int, Any]:
def get_adapter(adapter_id: int, def get_adapter(adapter_id: int,
registered_adapters: Dict[int, Any]) -> Optional[Any]: registered_adapters: Dict[int, Any]) -> Optional[Any]:
return registered_adapters.get(adapter_id, None) return registered_adapters.get(adapter_id)
## worker functions ## worker functions
......
...@@ -33,10 +33,8 @@ def is_block_tables_empty(block_tables: Union[None, Dict]): ...@@ -33,10 +33,8 @@ def is_block_tables_empty(block_tables: Union[None, Dict]):
""" """
if block_tables is None: if block_tables is None:
return True return True
if isinstance(block_tables, dict) and all( return (isinstance(block_tables, dict)
value is None for value in block_tables.values()): and all(value is None for value in block_tables.values()))
return True
return False
def compute_slot_mapping_start_idx(is_prompt: bool, query_len: int, def compute_slot_mapping_start_idx(is_prompt: bool, query_len: int,
......
...@@ -417,9 +417,7 @@ class PrefixCachingBlockAllocator(BlockAllocator): ...@@ -417,9 +417,7 @@ class PrefixCachingBlockAllocator(BlockAllocator):
def is_block_cached(self, block: Block) -> bool: def is_block_cached(self, block: Block) -> bool:
assert block.content_hash is not None assert block.content_hash is not None
if block.content_hash in self._cached_blocks: return block.content_hash in self._cached_blocks
return True
return False
def promote_to_immutable_block(self, block: Block) -> BlockId: def promote_to_immutable_block(self, block: Block) -> BlockId:
"""Once a mutable block is full, it can be promoted to an immutable """Once a mutable block is full, it can be promoted to an immutable
......
...@@ -399,9 +399,7 @@ class BlockSpaceManagerV2(BlockSpaceManager): ...@@ -399,9 +399,7 @@ class BlockSpaceManagerV2(BlockSpaceManager):
""" """
alloc_status = self._can_swap(seq_group, Device.CPU, alloc_status = self._can_swap(seq_group, Device.CPU,
SequenceStatus.RUNNING) SequenceStatus.RUNNING)
if alloc_status == AllocStatus.OK: return alloc_status == AllocStatus.OK
return True
return False
def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]: def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
"""Returns the block id mapping (from GPU to CPU) generated by """Returns the block id mapping (from GPU to CPU) generated by
......
...@@ -1042,7 +1042,7 @@ class AsyncLLMEngine: ...@@ -1042,7 +1042,7 @@ class AsyncLLMEngine:
async def start_profile(self) -> None: async def start_profile(self) -> None:
# using type instead of isinstance to check to avoid capturing # using type instead of isinstance to check to avoid capturing
# inherited classes # inherited classes
if type(self.engine.model_executor) == GPUExecutorAsync: if type(self.engine.model_executor) == GPUExecutorAsync: # noqa: E721
self.engine.model_executor.start_profile() self.engine.model_executor.start_profile()
else: else:
self.engine.model_executor._run_workers("start_profile") self.engine.model_executor._run_workers("start_profile")
...@@ -1050,7 +1050,7 @@ class AsyncLLMEngine: ...@@ -1050,7 +1050,7 @@ class AsyncLLMEngine:
async def stop_profile(self) -> None: async def stop_profile(self) -> None:
# using type instead of isinstance to check to avoid capturing # using type instead of isinstance to check to avoid capturing
# inherited classes # inherited classes
if type(self.engine.model_executor) == GPUExecutorAsync: if type(self.engine.model_executor) == GPUExecutorAsync: # noqa: E721
self.engine.model_executor.stop_profile() self.engine.model_executor.stop_profile()
else: else:
self.engine.model_executor._run_workers("stop_profile") self.engine.model_executor._run_workers("stop_profile")
...@@ -1605,7 +1605,7 @@ class LLMEngine: ...@@ -1605,7 +1605,7 @@ class LLMEngine:
def start_profile(self) -> None: def start_profile(self) -> None:
# using type instead of isinstance to check to avoid capturing # using type instead of isinstance to check to avoid capturing
# inherited classes (MultiprocessingGPUExecutor) # inherited classes (MultiprocessingGPUExecutor)
if type(self.model_executor) == GPUExecutor: if type(self.model_executor) == GPUExecutor: # noqa: E721
self.model_executor.start_profile() self.model_executor.start_profile()
else: else:
self.model_executor._run_workers("start_profile") self.model_executor._run_workers("start_profile")
...@@ -1613,7 +1613,7 @@ class LLMEngine: ...@@ -1613,7 +1613,7 @@ class LLMEngine:
def stop_profile(self) -> None: def stop_profile(self) -> None:
# using type instead of isinstance to check to avoid capturing # using type instead of isinstance to check to avoid capturing
# inherited classes (MultiprocessingGPUExecutor) # inherited classes (MultiprocessingGPUExecutor)
if type(self.model_executor) == GPUExecutor: if type(self.model_executor) == GPUExecutor: # noqa: E721
self.model_executor.stop_profile() self.model_executor.stop_profile()
else: else:
self.model_executor._run_workers("stop_profile") self.model_executor._run_workers("stop_profile")
......
...@@ -67,9 +67,9 @@ class BaseLogitsProcessor: ...@@ -67,9 +67,9 @@ class BaseLogitsProcessor:
instruction = self._guide.get_next_instruction( instruction = self._guide.get_next_instruction(
state=self._fsm_state[seq_id]) state=self._fsm_state[seq_id])
if type(instruction) == Generate: if type(instruction) == Generate: # noqa: E721
allowed_tokens = instruction.tokens allowed_tokens = instruction.tokens
elif type(instruction) == Write: elif type(instruction) == Write: # noqa: E721
# TODO: support fast forward tokens # TODO: support fast forward tokens
allowed_tokens = [instruction.tokens[0]] allowed_tokens = [instruction.tokens[0]]
else: else:
......
...@@ -110,9 +110,9 @@ class AWQMarlinConfig(QuantizationConfig): ...@@ -110,9 +110,9 @@ class AWQMarlinConfig(QuantizationConfig):
def is_awq_marlin_compatible(cls, quant_config: Dict[str, Any]): def is_awq_marlin_compatible(cls, quant_config: Dict[str, Any]):
# Extract data from quant config. # Extract data from quant config.
quant_method = quant_config.get("quant_method", "").lower() quant_method = quant_config.get("quant_method", "").lower()
num_bits = quant_config.get("bits", None) num_bits = quant_config.get("bits")
group_size = quant_config.get("group_size", None) group_size = quant_config.get("group_size")
has_zp = quant_config.get("zero_point", None) has_zp = quant_config.get("zero_point")
if quant_method != "awq": if quant_method != "awq":
return False return False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment