Unverified Commit cf069aa8 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update deprecated Python 3.8 typing (#13971)

parent bf33700e
......@@ -9,7 +9,6 @@ import subprocess
import sys
from pathlib import Path
from shutil import which
from typing import Dict, List
import torch
from packaging.version import Version, parse
......@@ -78,7 +77,7 @@ class CMakeExtension(Extension):
class cmake_build_ext(build_ext):
# A dict of extension directories that have been configured.
did_config: Dict[str, bool] = {}
did_config: dict[str, bool] = {}
#
# Determine number of compilation jobs and optionally nvcc compile threads.
......@@ -548,10 +547,10 @@ def get_vllm_version() -> str:
return version
def get_requirements() -> List[str]:
def get_requirements() -> list[str]:
"""Get Python package dependencies from requirements.txt."""
def _read_requirements(filename: str) -> List[str]:
def _read_requirements(filename: str) -> list[str]:
with open(get_path(filename)) as f:
requirements = f.read().strip().split("\n")
resolved_requirements = []
......
# SPDX-License-Identifier: Apache-2.0
"""vllm.entrypoints.api_server with some extra logging for testing."""
from typing import Any, Dict, Iterable
from collections.abc import Iterable
from typing import Any
import uvicorn
from fastapi.responses import JSONResponse, Response
......@@ -24,7 +25,7 @@ class AsyncLLMEngineWithStats(AsyncLLMEngine):
self._num_aborts += len(ids)
await super()._engine_abort(ids)
def testing_stats(self) -> Dict[str, Any]:
def testing_stats(self) -> dict[str, Any]:
return {"num_aborted_requests": self._num_aborts}
......
......@@ -6,7 +6,7 @@ import uuid
from asyncio import CancelledError
from copy import copy
from dataclasses import dataclass
from typing import List, Optional
from typing import Optional
import pytest
import pytest_asyncio
......@@ -254,7 +254,7 @@ async def test_output_kinds(async_engine, stop):
params.output_kind = RequestOutputKind.DELTA
prompt_tokens = None
output_tokens: List[int] = []
output_tokens: list[int] = []
output_text = ""
output_count = 0
final_output = None
......
......@@ -8,7 +8,7 @@ if the config `tractable_init` is set to True. Otherwise, the weights are
initialized randomly with a fixed seed.
"""
from dataclasses import dataclass
from typing import Any, List, Optional, Tuple
from typing import Any, Optional
import torch
from torch import nn
......@@ -56,7 +56,7 @@ class LlamaConfig:
random_seed: int = 0
def compute_hash(self) -> str:
factors: List[Any] = []
factors: list[Any] = []
for k, v in self.__dict__.items():
if k == "random_seed":
continue
......@@ -174,7 +174,7 @@ class LlamaDecoderLayer(nn.Module):
positions: torch.Tensor,
hidden_states: torch.Tensor,
residual: Optional[torch.Tensor],
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
"""
For tractable computation:
- if residual is None, the outputs are:
......
# SPDX-License-Identifier: Apache-2.0
import dataclasses
from typing import Dict, List, Optional
from typing import Optional
import pytest
......@@ -14,7 +14,7 @@ from ..utils import compare_all_settings
@dataclasses.dataclass
class TestSetting:
model: str
model_args: List[str]
model_args: list[str]
pp_size: int
tp_size: int
attn_backend: str
......@@ -108,8 +108,8 @@ def test_compile_correctness(test_setting: TestSetting):
final_args = ["--enforce-eager"] + model_args + ["-pp", str(pp_size)] + \
["-tp", str(tp_size)]
all_args: List[List[str]] = []
all_envs: List[Optional[Dict[str, str]]] = []
all_args: list[list[str]] = []
all_envs: list[Optional[dict[str, str]]] = []
for level in [
CompilationLevel.NO_COMPILATION,
......
......@@ -5,8 +5,7 @@ import os
import tempfile
from collections import UserList
from enum import Enum
from typing import (Any, Callable, Dict, List, Optional, Tuple, Type,
TypedDict, TypeVar, Union)
from typing import Any, Callable, Optional, TypedDict, TypeVar, Union
import numpy as np
import pytest
......@@ -47,14 +46,14 @@ _SYS_MSG = os.path.join(_TEST_DIR, "system_messages", "sonnet3.5_nov2024.txt")
_M = TypeVar("_M")
_PromptMultiModalInput = Union[List[_M], List[List[_M]]]
_PromptMultiModalInput = Union[list[_M], list[list[_M]]]
PromptImageInput = _PromptMultiModalInput[Image.Image]
PromptAudioInput = _PromptMultiModalInput[Tuple[np.ndarray, int]]
PromptAudioInput = _PromptMultiModalInput[tuple[np.ndarray, int]]
PromptVideoInput = _PromptMultiModalInput[np.ndarray]
def _read_prompts(filename: str) -> List[str]:
def _read_prompts(filename: str) -> list[str]:
with open(filename) as f:
prompts = f.readlines()
return prompts
......@@ -77,7 +76,7 @@ class _ImageAssets(_ImageAssetsBase):
ImageAsset("cherry_blossom"),
])
def prompts(self, prompts: _ImageAssetPrompts) -> List[str]:
def prompts(self, prompts: _ImageAssetPrompts) -> list[str]:
"""
Convenience method to define the prompt for each test image.
......@@ -102,7 +101,7 @@ class _VideoAssets(_VideoAssetsBase):
VideoAsset("sample_demo_1.mp4"),
])
def prompts(self, prompts: _VideoAssetPrompts) -> List[str]:
def prompts(self, prompts: _VideoAssetPrompts) -> list[str]:
return [prompts["sample_demo_1"]]
......@@ -175,7 +174,7 @@ def dynamo_reset():
@pytest.fixture
def example_prompts() -> List[str]:
def example_prompts() -> list[str]:
prompts = []
for filename in _TEST_PROMPTS:
prompts += _read_prompts(filename)
......@@ -197,7 +196,7 @@ class DecoderPromptType(Enum):
@pytest.fixture
def example_encoder_decoder_prompts(
) -> Dict[DecoderPromptType, List[ExplicitEncoderDecoderPrompt]]:
) -> dict[DecoderPromptType, list[ExplicitEncoderDecoderPrompt]]:
'''
Returns an encoder prompt list and a decoder prompt list, wherein each pair
of same-index entries in both lists corresponds to an (encoder prompt,
......@@ -229,7 +228,7 @@ def example_encoder_decoder_prompts(
@pytest.fixture
def example_long_prompts() -> List[str]:
def example_long_prompts() -> list[str]:
prompts = []
for filename in _LONG_PROMPTS:
prompts += _read_prompts(filename)
......@@ -273,11 +272,11 @@ class HfRunner:
model_name: str,
dtype: str = "half",
*,
model_kwargs: Optional[Dict[str, Any]] = None,
model_kwargs: Optional[dict[str, Any]] = None,
is_sentence_transformer: bool = False,
is_cross_encoder: bool = False,
skip_tokenizer_init: bool = False,
auto_cls: Type[_BaseAutoModelClass] = AutoModelForCausalLM,
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
postprocess_inputs: Callable[..., BatchEncoding] = identity,
) -> None:
torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
......@@ -334,11 +333,11 @@ class HfRunner:
def get_inputs(
self,
prompts: List[str],
prompts: list[str],
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
) -> List[BatchEncoding]:
) -> list[BatchEncoding]:
if images is not None:
assert len(prompts) == len(images)
......@@ -348,9 +347,9 @@ class HfRunner:
if audios is not None:
assert len(prompts) == len(audios)
all_inputs: List[BatchEncoding] = []
all_inputs: list[BatchEncoding] = []
for i, prompt in enumerate(prompts):
processor_kwargs: Dict[str, Any] = {
processor_kwargs: dict[str, Any] = {
"text": prompt,
"return_tensors": "pt",
}
......@@ -370,7 +369,7 @@ class HfRunner:
return all_inputs
def classify(self, prompts: List[str]) -> List[str]:
def classify(self, prompts: list[str]) -> list[str]:
# output is final logits
all_inputs = self.get_inputs(prompts)
outputs = []
......@@ -383,18 +382,18 @@ class HfRunner:
def generate(
self,
prompts: List[str],
prompts: list[str],
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
**kwargs: Any,
) -> List[Tuple[List[List[int]], List[str]]]:
) -> list[tuple[list[list[int]], list[str]]]:
all_inputs = self.get_inputs(prompts,
images=images,
videos=videos,
audios=audios)
outputs: List[Tuple[List[List[int]], List[str]]] = []
outputs: list[tuple[list[list[int]], list[str]]] = []
for inputs in all_inputs:
output_ids = self.model.generate(
**self.wrap_device(inputs, device=self.model.device.type),
......@@ -412,13 +411,13 @@ class HfRunner:
def generate_greedy(
self,
prompts: List[str],
prompts: list[str],
max_tokens: int,
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
**kwargs: Any,
) -> List[Tuple[List[int], str]]:
) -> list[tuple[list[int], str]]:
outputs = self.generate(prompts,
do_sample=False,
max_new_tokens=max_tokens,
......@@ -432,10 +431,10 @@ class HfRunner:
def generate_beam_search(
self,
prompts: List[str],
prompts: list[str],
beam_width: int,
max_tokens: int,
) -> List[Tuple[List[List[int]], List[str]]]:
) -> list[tuple[list[list[int]], list[str]]]:
outputs = self.generate(prompts,
do_sample=False,
max_new_tokens=max_tokens,
......@@ -453,19 +452,19 @@ class HfRunner:
def generate_greedy_logprobs(
self,
prompts: List[str],
prompts: list[str],
max_tokens: int,
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
**kwargs: Any,
) -> List[List[torch.Tensor]]:
) -> list[list[torch.Tensor]]:
all_inputs = self.get_inputs(prompts,
images=images,
videos=videos,
audios=audios)
all_logprobs: List[List[torch.Tensor]] = []
all_logprobs: list[list[torch.Tensor]] = []
for inputs in all_inputs:
output = self.model.generate(
**self.wrap_device(inputs, device=self.model.device.type),
......@@ -483,11 +482,11 @@ class HfRunner:
def _hidden_states_to_seq_logprobs(
self,
hidden_states: Tuple[Tuple[torch.Tensor, ...], ...],
) -> List[torch.Tensor]:
hidden_states: tuple[tuple[torch.Tensor, ...], ...],
) -> list[torch.Tensor]:
output_embeddings = self.model.get_output_embeddings()
seq_logprobs: List[torch.Tensor] = []
seq_logprobs: list[torch.Tensor] = []
for _, hidden_state in enumerate(hidden_states):
last_hidden_states = hidden_state[-1][0]
logits = torch.matmul(
......@@ -503,14 +502,14 @@ class HfRunner:
def _hidden_states_to_logprobs(
self,
hidden_states: Tuple[Tuple[torch.Tensor, ...], ...],
hidden_states: tuple[tuple[torch.Tensor, ...], ...],
num_logprobs: int,
) -> Tuple[List[Dict[int, float]], int]:
) -> tuple[list[dict[int, float]], int]:
seq_logprobs = self._hidden_states_to_seq_logprobs(hidden_states)
output_len = len(hidden_states)
# convert to dict
seq_logprobs_lst: List[Dict[int, float]] = []
seq_logprobs_lst: list[dict[int, float]] = []
for tok_idx, tok_logprobs in enumerate(seq_logprobs):
# drop prompt logprobs
if tok_idx == 0:
......@@ -530,22 +529,22 @@ class HfRunner:
def generate_greedy_logprobs_limit(
self,
prompts: List[str],
prompts: list[str],
max_tokens: int,
num_logprobs: int,
images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None,
**kwargs: Any,
) -> List[TokensTextLogprobs]:
) -> list[TokensTextLogprobs]:
all_inputs = self.get_inputs(prompts,
images=images,
videos=videos,
audios=audios)
all_logprobs: List[List[Dict[int, float]]] = []
all_output_ids: List[List[int]] = []
all_output_strs: List[str] = []
all_logprobs: list[list[dict[int, float]]] = []
all_output_ids: list[list[int]] = []
all_output_strs: list[str] = []
for inputs in all_inputs:
output = self.model.generate(
......@@ -577,23 +576,23 @@ class HfRunner:
def generate_encoder_decoder_greedy_logprobs_limit(
self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]],
max_tokens: int,
num_logprobs: int,
images: Optional[PromptImageInput] = None,
**kwargs: Any,
) -> List[TokensTextLogprobs]:
) -> list[TokensTextLogprobs]:
'''
Greedy logprobs generation for vLLM encoder/decoder models
'''
all_logprobs: List[List[Dict[int, float]]] = []
all_output_ids: List[List[int]] = []
all_output_strs: List[str] = []
all_logprobs: list[list[dict[int, float]]] = []
all_output_ids: list[list[int]] = []
all_output_strs: list[str] = []
for i, (encoder_prompt, decoder_prompt) in enumerate(
to_enc_dec_tuple_list(encoder_decoder_prompts)):
processor_kwargs: Dict[str, Any] = {
processor_kwargs: dict[str, Any] = {
"text": encoder_prompt,
"return_tensors": "pt",
}
......@@ -641,10 +640,10 @@ class HfRunner:
return [(output_ids, output_str, output_logprobs)
for output_ids, output_str, output_logprobs in outputs]
def encode(self, prompts: List[str]) -> List[List[torch.Tensor]]:
def encode(self, prompts: list[str]) -> list[list[torch.Tensor]]:
return self.model.encode(prompts)
def predict(self, prompts: List[List[str]]) -> torch.Tensor:
def predict(self, prompts: list[list[str]]) -> torch.Tensor:
return self.model.predict(prompts, convert_to_tensor=True)
def __enter__(self):
......@@ -699,11 +698,11 @@ class VllmRunner:
def get_inputs(
self,
prompts: List[str],
prompts: list[str],
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
) -> List[TextPrompt]:
) -> list[TextPrompt]:
if images is not None:
assert len(prompts) == len(images)
......@@ -733,13 +732,13 @@ class VllmRunner:
def generate(
self,
prompts: List[str],
prompts: list[str],
sampling_params: SamplingParams,
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
**kwargs: Any,
) -> List[Tuple[List[List[int]], List[str]]]:
) -> list[tuple[list[list[int]], list[str]]]:
inputs = self.get_inputs(prompts,
images=images,
videos=videos,
......@@ -749,12 +748,12 @@ class VllmRunner:
sampling_params=sampling_params,
**kwargs)
outputs: List[Tuple[List[List[int]], List[str]]] = []
outputs: list[tuple[list[list[int]], list[str]]] = []
for req_output in req_outputs:
prompt_str = req_output.prompt
prompt_ids = req_output.prompt_token_ids
req_sample_output_ids: List[List[int]] = []
req_sample_output_strs: List[str] = []
req_sample_output_ids: list[list[int]] = []
req_sample_output_strs: list[str] = []
for sample in req_output.outputs:
output_str = sample.text
output_ids = list(sample.token_ids)
......@@ -765,9 +764,9 @@ class VllmRunner:
@staticmethod
def _final_steps_generate_w_logprobs(
req_outputs: List[RequestOutput],
) -> List[TokensTextLogprobsPromptLogprobs]:
outputs: List[TokensTextLogprobsPromptLogprobs] = []
req_outputs: list[RequestOutput],
) -> list[TokensTextLogprobsPromptLogprobs]:
outputs: list[TokensTextLogprobsPromptLogprobs] = []
for req_output in req_outputs:
assert len(req_output.outputs) > 0
for sample in req_output.outputs:
......@@ -780,14 +779,14 @@ class VllmRunner:
def generate_w_logprobs(
self,
prompts: List[str],
prompts: list[str],
sampling_params: SamplingParams,
images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None,
**kwargs: Any,
) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]:
) -> Union[list[TokensTextLogprobs],
list[TokensTextLogprobsPromptLogprobs]]:
inputs = self.get_inputs(prompts,
images=images,
videos=videos,
......@@ -806,10 +805,10 @@ class VllmRunner:
def generate_encoder_decoder_w_logprobs(
self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]],
sampling_params: SamplingParams,
) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]:
) -> Union[list[TokensTextLogprobs],
list[TokensTextLogprobsPromptLogprobs]]:
'''
Logprobs generation for vLLM encoder/decoder models
'''
......@@ -826,13 +825,13 @@ class VllmRunner:
def generate_greedy(
self,
prompts: List[str],
prompts: list[str],
max_tokens: int,
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
**kwargs: Any,
) -> List[Tuple[List[int], str]]:
) -> list[tuple[list[int], str]]:
greedy_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
outputs = self.generate(prompts,
greedy_params,
......@@ -845,18 +844,18 @@ class VllmRunner:
def generate_greedy_logprobs(
self,
prompts: List[str],
prompts: list[str],
max_tokens: int,
num_logprobs: int,
num_prompt_logprobs: Optional[int] = None,
images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None,
stop_token_ids: Optional[List[int]] = None,
stop: Optional[List[str]] = None,
stop_token_ids: Optional[list[int]] = None,
stop: Optional[list[str]] = None,
**kwargs: Any,
) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]:
) -> Union[list[TokensTextLogprobs],
list[TokensTextLogprobsPromptLogprobs]]:
greedy_logprobs_params = SamplingParams(
temperature=0.0,
max_tokens=max_tokens,
......@@ -874,12 +873,12 @@ class VllmRunner:
def generate_encoder_decoder_greedy_logprobs(
self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]],
max_tokens: int,
num_logprobs: int,
num_prompt_logprobs: Optional[int] = None,
) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]:
) -> Union[list[TokensTextLogprobs],
list[TokensTextLogprobsPromptLogprobs]]:
greedy_logprobs_params = SamplingParams(
temperature=0.0,
max_tokens=max_tokens,
......@@ -895,10 +894,10 @@ class VllmRunner:
def generate_beam_search(
self,
prompts: Union[List[str], List[List[int]]],
prompts: Union[list[str], list[list[int]]],
beam_width: int,
max_tokens: int,
) -> List[Tuple[List[List[int]], List[str]]]:
) -> list[tuple[list[list[int]], list[str]]]:
if is_list_of(prompts, str, check="all"):
prompts = [TextPrompt(prompt=prompt) for prompt in prompts]
else:
......@@ -915,17 +914,17 @@ class VllmRunner:
returned_outputs.append((token_ids, texts))
return returned_outputs
def classify(self, prompts: List[str]) -> List[List[float]]:
def classify(self, prompts: list[str]) -> list[list[float]]:
req_outputs = self.model.classify(prompts)
return [req_output.outputs.probs for req_output in req_outputs]
def encode(
self,
prompts: List[str],
prompts: list[str],
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
) -> List[List[float]]:
) -> list[list[float]]:
inputs = self.get_inputs(prompts,
images=images,
videos=videos,
......@@ -936,9 +935,9 @@ class VllmRunner:
def score(
self,
text_1: Union[str, List[str]],
text_2: Union[str, List[str]],
) -> List[float]:
text_1: Union[str, list[str]],
text_2: Union[str, list[str]],
) -> list[float]:
req_outputs = self.model.score(text_1, text_2)
return [req_output.outputs.score for req_output in req_outputs]
......
# SPDX-License-Identifier: Apache-2.0
from typing import Callable, Iterable, Optional
from collections.abc import Iterable
from typing import Callable, Optional
import pytest
......
# SPDX-License-Identifier: Apache-2.0
import random
from typing import List
import pytest
......@@ -137,9 +136,9 @@ def prep_prompts(batch_size: int):
The prompt is just under 10k tokens; sliding window is 4k
so the answer is outside sliding window, but should still be correct.
"""
prompts: List[str] = []
answer: List[int] = []
indices: List[int] = []
prompts: list[str] = []
answer: list[int] = []
indices: list[int] = []
random.seed(1)
for _ in range(batch_size):
idx = random.randint(30, 90)
......@@ -158,7 +157,7 @@ def prep_prompts(batch_size: int):
return prompts, answer, indices
def check_answers(indices: List[int], answer: List[int], outputs: List[str]):
def check_answers(indices: list[int], answer: list[int], outputs: list[str]):
answer2 = [int(text[0:2].strip()) for text in outputs]
print(list(zip(indices, zip(answer, answer2))))
numok = 0
......@@ -170,7 +169,7 @@ def check_answers(indices: List[int], answer: List[int], outputs: List[str]):
assert frac_ok > 0.7
def check_window(prompts: List[str]):
def check_window(prompts: list[str]):
def inner(llm: LLM):
sliding_window = llm.llm_engine.model_config.get_sliding_window()
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest
from vllm.core.block.block_table import BlockTable
......@@ -32,7 +30,7 @@ def test_allocate_naive(block_size: int, sequence_len: int):
token_ids = list(range(sequence_len))
num_blocks_per_alloc = len(list(chunk_list(token_ids, block_size)))
block_tables: List[BlockTable] = []
block_tables: list[BlockTable] = []
for i in range(5):
assert allocator.get_num_free_blocks(
device=Device.GPU) == num_gpu_blocks - i * num_blocks_per_alloc
......@@ -77,7 +75,7 @@ def test_allocate_prefix_caching(block_size: int, sequence_len: int):
num_immutable_blocks_per_alloc = len(
chunked_tokens) - num_mutable_blocks_per_alloc
block_tables: List[BlockTable] = []
block_tables: list[BlockTable] = []
for alloc_i in range(1, 6):
block_tables.append(
......@@ -272,7 +270,7 @@ def test_append_token_ids_correct_content(block_size: int, sequence_len: int,
)
block_table.allocate(token_ids=token_ids, device=Device.GPU)
appended_so_far: List[int] = []
appended_so_far: list[int] = []
for append in chunk_list(token_ids_to_append, append_size):
block_table.append_token_ids(append)
appended_so_far.extend(append)
......
# SPDX-License-Identifier: Apache-2.0
from typing import List, Optional
from typing import Optional
import pytest
......@@ -14,7 +14,7 @@ class TestNaiveBlockAllocator:
def create_allocate_lambda(allocate_type: str,
allocator: NaiveBlockAllocator,
prev_block: Optional[Block],
token_ids: List[int]):
token_ids: list[int]):
if allocate_type == "immutable":
allocate_block = lambda: allocator.allocate_immutable_block(
prev_block=prev_block, token_ids=token_ids)
......
......@@ -2,7 +2,7 @@
import math
import random
from typing import List, Optional
from typing import Optional
from unittest.mock import MagicMock
import pytest
......@@ -123,11 +123,11 @@ class TestPrefixCachingBlock:
@staticmethod
def create_chain(block_size: int,
token_ids: List[int],
num_empty_trailing_blocks=0) -> List[PrefixCachingBlock]:
token_ids: list[int],
num_empty_trailing_blocks=0) -> list[PrefixCachingBlock]:
"""Helper method which creates a chain of blocks.
"""
blocks: List[PrefixCachingBlock] = []
blocks: list[PrefixCachingBlock] = []
num_blocks = math.ceil(
len(token_ids) / block_size) + num_empty_trailing_blocks
......@@ -161,7 +161,7 @@ class TestPrefixCachingBlockAllocator:
@staticmethod
def create_allocate_lambda(allocate_type: str, allocator: BlockAllocator,
prev_block: Optional[Block],
token_ids: List[int]):
token_ids: list[int]):
if allocate_type == "immutable":
allocate_block = lambda: allocator.allocate_immutable_block(
prev_block=prev_block, token_ids=token_ids)
......@@ -839,13 +839,13 @@ class TestPrefixCachingBlockAllocator:
@staticmethod
def create_immutable_chain(
block_size: int,
token_ids: List[int],
token_ids: list[int],
allocator: PrefixCachingBlockAllocator,
extra_hash: Optional[int] = None,
) -> List[PrefixCachingBlock]:
) -> list[PrefixCachingBlock]:
"""Helper method which creates a chain of blocks.
"""
blocks: List[Block] = []
blocks: list[Block] = []
num_blocks = math.ceil(len(token_ids) / block_size)
if num_blocks == 0:
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
from unittest.mock import MagicMock
import pytest # noqa
......@@ -46,7 +45,7 @@ def test_simple():
cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(num_seq_group):
......@@ -93,7 +92,7 @@ def test_chunk():
cache_config.num_cpu_blocks = 32
cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(2):
......@@ -145,7 +144,7 @@ def test_concurrent_chunking():
cache_config.num_cpu_blocks = 32
cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(2):
......@@ -226,8 +225,8 @@ def test_short_prompts_jump_long_prompts_in_queue():
cache_config.num_cpu_blocks = 3200 # large KV cache size for large requests
cache_config.num_gpu_blocks = 3200
scheduler = Scheduler(scheduler_config, cache_config, None)
long_seqs: List[SequenceGroup] = []
short_seqs: List[SequenceGroup] = []
long_seqs: list[SequenceGroup] = []
short_seqs: list[SequenceGroup] = []
# Add 2 large seq groups to scheduler.
for i in range(2):
......@@ -368,7 +367,7 @@ def test_complex():
cache_config.num_cpu_blocks = 64
cache_config.num_gpu_blocks = 64
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(2):
......@@ -439,7 +438,7 @@ def test_maximal_decoding():
cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(2):
......@@ -533,7 +532,7 @@ def test_prompt_limit():
cache_config.num_cpu_blocks = 16
cache_config.num_gpu_blocks = 16
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
_, seq_group = create_dummy_prompt("1",
prompt_length=48,
......@@ -565,7 +564,7 @@ def test_prompt_limit_exceed():
cache_config.num_cpu_blocks = 16
cache_config.num_gpu_blocks = 16
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
_, seq_group = create_dummy_prompt("2",
prompt_length=48,
block_size=block_size)
......@@ -699,7 +698,7 @@ def test_chunked_prefill_max_seqs():
cache_config.num_cpu_blocks = 128
cache_config.num_gpu_blocks = 128
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
_, seq_group = create_dummy_prompt("1",
prompt_length=65,
......@@ -758,7 +757,7 @@ def test_prefix_caching():
cache_config.num_cpu_blocks = 0
cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(2):
......@@ -800,7 +799,7 @@ def test_prefix_caching_with_concurrent_partial_prefills():
cache_config.num_cpu_blocks = 0
cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(2):
......
......@@ -2,7 +2,6 @@
import time
from collections import deque
from typing import List, Set, Tuple
from unittest.mock import MagicMock
import pytest # noqa
......@@ -57,7 +56,7 @@ def test_scheduler_abort_seq_group():
# Add multiple seq groups to scheduler.
num_seq_group = 4
request_ids: Set[str] = set()
request_ids: set[str] = set()
for i in range(num_seq_group):
_, seq_group = create_dummy_prompt(str(i), block_size)
scheduler.add_seq_group(seq_group)
......@@ -83,7 +82,7 @@ def test_scheduler_schedule_simple():
cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(num_seq_group):
......@@ -221,7 +220,7 @@ def test_scheduler_max_seqs():
cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None)
all_seq_groups: List[SequenceGroup] = []
all_seq_groups: list[SequenceGroup] = []
# Add seq groups to scheduler.
for i in range(num_seq_group):
_, seq_group = create_dummy_prompt(str(i),
......@@ -480,7 +479,7 @@ def test_prefill_schedule_max_lora():
num_cpu_blocks=64,
num_gpu_blocks=64)
budget = create_token_budget(token_budget=120)
curr_loras: Set[int] = set()
curr_loras: set[int] = set()
for i in range(2):
_, seq_group = create_dummy_prompt(str(i),
prompt_length=60,
......@@ -651,8 +650,8 @@ def test_schedule_swapped_max_loras():
block_size=block_size,
num_cpu_blocks=32,
num_gpu_blocks=32)
curr_loras: Set[int] = set()
blocks_to_swap_out: List[Tuple[int, int]] = []
curr_loras: set[int] = set()
blocks_to_swap_out: list[tuple[int, int]] = []
for i in range(2):
_, seq_group = create_dummy_prompt(str(i),
prompt_length=60,
......@@ -683,7 +682,7 @@ def test_schedule_swapped_cannot_swap_in():
num_cpu_blocks=32,
num_gpu_blocks=32)
curr_loras = None
blocks_to_swap_out: List[Tuple[int, int]] = []
blocks_to_swap_out: list[tuple[int, int]] = []
for i in range(2):
_, seq_group = create_dummy_prompt(str(i),
prompt_length=60,
......@@ -714,7 +713,7 @@ def test_infeasible_swap():
num_cpu_blocks=32,
num_gpu_blocks=32)
curr_loras = None
blocks_to_swap_out: List[Tuple[int, int]] = []
blocks_to_swap_out: list[tuple[int, int]] = []
for i in range(2):
_, seq_group = create_dummy_prompt(str(i),
prompt_length=60,
......@@ -752,7 +751,7 @@ def test_schedule_swapped_blocks_to_copy():
block_size=block_size)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
blocks_to_swap_out: List[Tuple[int, int]] = []
blocks_to_swap_out: list[tuple[int, int]] = []
scheduler._swap_out(seq_group, blocks_to_swap_out)
scheduler._add_seq_group_to_swapped(seq_group)
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest # noqa
from vllm.config import CacheConfig, SchedulerConfig
......@@ -48,7 +46,7 @@ def test_scheduler_schedule_simple_encoder_decoder():
cache_config.num_cpu_blocks = 16 # enc and dec prompts per seq_group
cache_config.num_gpu_blocks = 16 # enc and dec prompts per seq_group
scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = []
running: list[SequenceGroup] = []
# Add seq groups to scheduler.
req_id_list = []
......
......@@ -2,9 +2,8 @@
import time
from collections import defaultdict
from typing import Any, Dict, List, Optional
from typing import Sequence as GenericSequence
from typing import Tuple
from collections.abc import Sequence as GenericSequence
from typing import Any, Optional
from vllm import SamplingParams
from vllm.core.scheduler import Scheduler, SchedulerOutputs
......@@ -20,10 +19,10 @@ def create_dummy_prompt(
block_size: Optional[int] = None,
lora_request: Optional[LoRARequest] = None,
best_of: int = 1,
prompt_tokens: Optional[List[int]] = None,
prompt_tokens: Optional[list[int]] = None,
min_tokens: int = 0,
max_tokens: int = 16,
) -> Tuple[Sequence, SequenceGroup]:
) -> tuple[Sequence, SequenceGroup]:
if not block_size:
block_size = prompt_length
......@@ -48,7 +47,7 @@ def create_dummy_prompt(
return prompt, seq_group
def create_dummy_lora_sequence(request_id: int, token_ids: List[int],
def create_dummy_lora_sequence(request_id: int, token_ids: list[int],
block_size: int, lora_int_id: int) -> Sequence:
return Sequence(seq_id=request_id,
inputs=token_inputs(token_ids),
......@@ -58,7 +57,7 @@ def create_dummy_lora_sequence(request_id: int, token_ids: List[int],
lora_int_id=lora_int_id))
def create_dummy_sequence(request_id: int, token_ids: List[int],
def create_dummy_sequence(request_id: int, token_ids: list[int],
block_size: int) -> Sequence:
return Sequence(
seq_id=request_id,
......@@ -74,7 +73,7 @@ def create_dummy_prompt_encoder_decoder(
block_size: Optional[int] = None,
lora_request: Optional[LoRARequest] = None,
best_of: int = 1,
) -> Tuple[Sequence, Sequence, SequenceGroup]:
) -> tuple[Sequence, Sequence, SequenceGroup]:
if not block_size:
block_size = decoder_prompt_length
......@@ -125,7 +124,7 @@ def create_seq_group(
prompt_token_ids = [0] * seq_prompt_len
seqs: List[Sequence] = []
seqs: list[Sequence] = []
for seq_id_offset, output_len in enumerate(seq_output_lens):
seq = Sequence(
seq_id=seq_id_start + seq_id_offset,
......@@ -241,7 +240,7 @@ class SchedulerProxy:
def __init__(self, scheduler: Scheduler):
self.scheduler_ = scheduler
self.call_history: Dict[str, List[Any]] = defaultdict(list)
self.call_history: dict[str, list[Any]] = defaultdict(list)
def __getattr__(self, name: str) -> Any:
......@@ -253,6 +252,6 @@ class SchedulerProxy:
return wrapper
def last_schedule_ret(
self, ) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs, Any]:
self, ) -> tuple[list[SequenceGroupMetadata], SchedulerOutputs, Any]:
_, _, ret = self.call_history["schedule"][-1]
return ret
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
from typing import List, Literal, NamedTuple, Optional
from typing import Literal, NamedTuple, Optional
import pytest
......@@ -28,8 +28,8 @@ class EPTestOptions(NamedTuple):
@dataclass
class EPTestSettings:
parallel_setups: List[ParallelSetup]
distributed_backends: List[str]
parallel_setups: list[ParallelSetup]
distributed_backends: list[str]
task: TaskOption
test_options: EPTestOptions
......
......@@ -9,7 +9,7 @@ WARNING: This test runs in both single-node (4 GPUs) and multi-node
import json
import os
from dataclasses import dataclass
from typing import List, Literal, NamedTuple, Optional
from typing import Literal, NamedTuple, Optional
import pytest
......@@ -38,14 +38,14 @@ class PPTestOptions(NamedTuple):
@dataclass
class PPTestSettings:
parallel_setups: List[ParallelSetup]
parallel_setups: list[ParallelSetup]
# NOTE: the length of distributed_backends and
# vllm_major_versions should be the same, and they
# are first zipped together to iterate over all
# test settings.
distributed_backends: List[str]
distributed_backends: list[str]
# vllm major version: "0" for V0, "1" for V1
vllm_major_versions: List[str]
vllm_major_versions: list[str]
task: TaskOption
test_options: PPTestOptions
......
......@@ -2,7 +2,6 @@
import multiprocessing
import os
from typing import Dict, List
import pytest
import torch
......@@ -20,9 +19,9 @@ from vllm.utils import update_environment_variables
def distributed_run(fn, world_size):
number_of_processes = world_size
processes: List[multiprocessing.Process] = []
processes: list[multiprocessing.Process] = []
for i in range(number_of_processes):
env: Dict[str, str] = {}
env: dict[str, str] = {}
env['RANK'] = str(i)
env['LOCAL_RANK'] = str(i)
env['WORLD_SIZE'] = str(number_of_processes)
......
......@@ -3,7 +3,6 @@
import multiprocessing
import random
import time
from typing import List
import numpy as np
import torch.distributed as dist
......@@ -13,7 +12,7 @@ from vllm.distributed.utils import StatelessProcessGroup
from vllm.utils import get_ip, get_open_port, update_environment_variables
def get_arrays(n: int, seed: int = 0) -> List[np.ndarray]:
def get_arrays(n: int, seed: int = 0) -> list[np.ndarray]:
np.random.seed(seed)
sizes = np.random.randint(1, 10_000, n)
# on average, each array will have 5k elements
......
......@@ -3,7 +3,7 @@
Run `pytest tests/encoder_decoder/test_e2e_correctness.py`.
"""
from typing import List, Optional, Tuple
from typing import Optional
import pytest
from transformers import AutoModelForSeq2SeqLM
......@@ -22,7 +22,7 @@ LIST_ENC_DEC_SUPPORTED_BACKENDS = [
def vllm_to_hf_output(
vllm_output: Tuple[List[int], str, Optional[SampleLogprobs]],
vllm_output: tuple[list[int], str, Optional[SampleLogprobs]],
decoder_prompt_type: DecoderPromptType,
):
"""Sanitize vllm output to be comparable with hf output."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment