Unverified Commit cf069aa8 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update deprecated Python 3.8 typing (#13971)

parent bf33700e
...@@ -9,7 +9,6 @@ import subprocess ...@@ -9,7 +9,6 @@ import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
from shutil import which from shutil import which
from typing import Dict, List
import torch import torch
from packaging.version import Version, parse from packaging.version import Version, parse
...@@ -78,7 +77,7 @@ class CMakeExtension(Extension): ...@@ -78,7 +77,7 @@ class CMakeExtension(Extension):
class cmake_build_ext(build_ext): class cmake_build_ext(build_ext):
# A dict of extension directories that have been configured. # A dict of extension directories that have been configured.
did_config: Dict[str, bool] = {} did_config: dict[str, bool] = {}
# #
# Determine number of compilation jobs and optionally nvcc compile threads. # Determine number of compilation jobs and optionally nvcc compile threads.
...@@ -548,10 +547,10 @@ def get_vllm_version() -> str: ...@@ -548,10 +547,10 @@ def get_vllm_version() -> str:
return version return version
def get_requirements() -> List[str]: def get_requirements() -> list[str]:
"""Get Python package dependencies from requirements.txt.""" """Get Python package dependencies from requirements.txt."""
def _read_requirements(filename: str) -> List[str]: def _read_requirements(filename: str) -> list[str]:
with open(get_path(filename)) as f: with open(get_path(filename)) as f:
requirements = f.read().strip().split("\n") requirements = f.read().strip().split("\n")
resolved_requirements = [] resolved_requirements = []
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""vllm.entrypoints.api_server with some extra logging for testing.""" """vllm.entrypoints.api_server with some extra logging for testing."""
from typing import Any, Dict, Iterable from collections.abc import Iterable
from typing import Any
import uvicorn import uvicorn
from fastapi.responses import JSONResponse, Response from fastapi.responses import JSONResponse, Response
...@@ -24,7 +25,7 @@ class AsyncLLMEngineWithStats(AsyncLLMEngine): ...@@ -24,7 +25,7 @@ class AsyncLLMEngineWithStats(AsyncLLMEngine):
self._num_aborts += len(ids) self._num_aborts += len(ids)
await super()._engine_abort(ids) await super()._engine_abort(ids)
def testing_stats(self) -> Dict[str, Any]: def testing_stats(self) -> dict[str, Any]:
return {"num_aborted_requests": self._num_aborts} return {"num_aborted_requests": self._num_aborts}
......
...@@ -6,7 +6,7 @@ import uuid ...@@ -6,7 +6,7 @@ import uuid
from asyncio import CancelledError from asyncio import CancelledError
from copy import copy from copy import copy
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional from typing import Optional
import pytest import pytest
import pytest_asyncio import pytest_asyncio
...@@ -254,7 +254,7 @@ async def test_output_kinds(async_engine, stop): ...@@ -254,7 +254,7 @@ async def test_output_kinds(async_engine, stop):
params.output_kind = RequestOutputKind.DELTA params.output_kind = RequestOutputKind.DELTA
prompt_tokens = None prompt_tokens = None
output_tokens: List[int] = [] output_tokens: list[int] = []
output_text = "" output_text = ""
output_count = 0 output_count = 0
final_output = None final_output = None
......
...@@ -8,7 +8,7 @@ if the config `tractable_init` is set to True. Otherwise, the weights are ...@@ -8,7 +8,7 @@ if the config `tractable_init` is set to True. Otherwise, the weights are
initialized randomly with a fixed seed. initialized randomly with a fixed seed.
""" """
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, List, Optional, Tuple from typing import Any, Optional
import torch import torch
from torch import nn from torch import nn
...@@ -56,7 +56,7 @@ class LlamaConfig: ...@@ -56,7 +56,7 @@ class LlamaConfig:
random_seed: int = 0 random_seed: int = 0
def compute_hash(self) -> str: def compute_hash(self) -> str:
factors: List[Any] = [] factors: list[Any] = []
for k, v in self.__dict__.items(): for k, v in self.__dict__.items():
if k == "random_seed": if k == "random_seed":
continue continue
...@@ -174,7 +174,7 @@ class LlamaDecoderLayer(nn.Module): ...@@ -174,7 +174,7 @@ class LlamaDecoderLayer(nn.Module):
positions: torch.Tensor, positions: torch.Tensor,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
residual: Optional[torch.Tensor], residual: Optional[torch.Tensor],
) -> Tuple[torch.Tensor, torch.Tensor]: ) -> tuple[torch.Tensor, torch.Tensor]:
""" """
For tractable computation: For tractable computation:
- if residual is None, the outputs are: - if residual is None, the outputs are:
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import dataclasses import dataclasses
from typing import Dict, List, Optional from typing import Optional
import pytest import pytest
...@@ -14,7 +14,7 @@ from ..utils import compare_all_settings ...@@ -14,7 +14,7 @@ from ..utils import compare_all_settings
@dataclasses.dataclass @dataclasses.dataclass
class TestSetting: class TestSetting:
model: str model: str
model_args: List[str] model_args: list[str]
pp_size: int pp_size: int
tp_size: int tp_size: int
attn_backend: str attn_backend: str
...@@ -108,8 +108,8 @@ def test_compile_correctness(test_setting: TestSetting): ...@@ -108,8 +108,8 @@ def test_compile_correctness(test_setting: TestSetting):
final_args = ["--enforce-eager"] + model_args + ["-pp", str(pp_size)] + \ final_args = ["--enforce-eager"] + model_args + ["-pp", str(pp_size)] + \
["-tp", str(tp_size)] ["-tp", str(tp_size)]
all_args: List[List[str]] = [] all_args: list[list[str]] = []
all_envs: List[Optional[Dict[str, str]]] = [] all_envs: list[Optional[dict[str, str]]] = []
for level in [ for level in [
CompilationLevel.NO_COMPILATION, CompilationLevel.NO_COMPILATION,
......
...@@ -5,8 +5,7 @@ import os ...@@ -5,8 +5,7 @@ import os
import tempfile import tempfile
from collections import UserList from collections import UserList
from enum import Enum from enum import Enum
from typing import (Any, Callable, Dict, List, Optional, Tuple, Type, from typing import Any, Callable, Optional, TypedDict, TypeVar, Union
TypedDict, TypeVar, Union)
import numpy as np import numpy as np
import pytest import pytest
...@@ -47,14 +46,14 @@ _SYS_MSG = os.path.join(_TEST_DIR, "system_messages", "sonnet3.5_nov2024.txt") ...@@ -47,14 +46,14 @@ _SYS_MSG = os.path.join(_TEST_DIR, "system_messages", "sonnet3.5_nov2024.txt")
_M = TypeVar("_M") _M = TypeVar("_M")
_PromptMultiModalInput = Union[List[_M], List[List[_M]]] _PromptMultiModalInput = Union[list[_M], list[list[_M]]]
PromptImageInput = _PromptMultiModalInput[Image.Image] PromptImageInput = _PromptMultiModalInput[Image.Image]
PromptAudioInput = _PromptMultiModalInput[Tuple[np.ndarray, int]] PromptAudioInput = _PromptMultiModalInput[tuple[np.ndarray, int]]
PromptVideoInput = _PromptMultiModalInput[np.ndarray] PromptVideoInput = _PromptMultiModalInput[np.ndarray]
def _read_prompts(filename: str) -> List[str]: def _read_prompts(filename: str) -> list[str]:
with open(filename) as f: with open(filename) as f:
prompts = f.readlines() prompts = f.readlines()
return prompts return prompts
...@@ -77,7 +76,7 @@ class _ImageAssets(_ImageAssetsBase): ...@@ -77,7 +76,7 @@ class _ImageAssets(_ImageAssetsBase):
ImageAsset("cherry_blossom"), ImageAsset("cherry_blossom"),
]) ])
def prompts(self, prompts: _ImageAssetPrompts) -> List[str]: def prompts(self, prompts: _ImageAssetPrompts) -> list[str]:
""" """
Convenience method to define the prompt for each test image. Convenience method to define the prompt for each test image.
...@@ -102,7 +101,7 @@ class _VideoAssets(_VideoAssetsBase): ...@@ -102,7 +101,7 @@ class _VideoAssets(_VideoAssetsBase):
VideoAsset("sample_demo_1.mp4"), VideoAsset("sample_demo_1.mp4"),
]) ])
def prompts(self, prompts: _VideoAssetPrompts) -> List[str]: def prompts(self, prompts: _VideoAssetPrompts) -> list[str]:
return [prompts["sample_demo_1"]] return [prompts["sample_demo_1"]]
...@@ -175,7 +174,7 @@ def dynamo_reset(): ...@@ -175,7 +174,7 @@ def dynamo_reset():
@pytest.fixture @pytest.fixture
def example_prompts() -> List[str]: def example_prompts() -> list[str]:
prompts = [] prompts = []
for filename in _TEST_PROMPTS: for filename in _TEST_PROMPTS:
prompts += _read_prompts(filename) prompts += _read_prompts(filename)
...@@ -197,7 +196,7 @@ class DecoderPromptType(Enum): ...@@ -197,7 +196,7 @@ class DecoderPromptType(Enum):
@pytest.fixture @pytest.fixture
def example_encoder_decoder_prompts( def example_encoder_decoder_prompts(
) -> Dict[DecoderPromptType, List[ExplicitEncoderDecoderPrompt]]: ) -> dict[DecoderPromptType, list[ExplicitEncoderDecoderPrompt]]:
''' '''
Returns an encoder prompt list and a decoder prompt list, wherein each pair Returns an encoder prompt list and a decoder prompt list, wherein each pair
of same-index entries in both lists corresponds to an (encoder prompt, of same-index entries in both lists corresponds to an (encoder prompt,
...@@ -229,7 +228,7 @@ def example_encoder_decoder_prompts( ...@@ -229,7 +228,7 @@ def example_encoder_decoder_prompts(
@pytest.fixture @pytest.fixture
def example_long_prompts() -> List[str]: def example_long_prompts() -> list[str]:
prompts = [] prompts = []
for filename in _LONG_PROMPTS: for filename in _LONG_PROMPTS:
prompts += _read_prompts(filename) prompts += _read_prompts(filename)
...@@ -273,11 +272,11 @@ class HfRunner: ...@@ -273,11 +272,11 @@ class HfRunner:
model_name: str, model_name: str,
dtype: str = "half", dtype: str = "half",
*, *,
model_kwargs: Optional[Dict[str, Any]] = None, model_kwargs: Optional[dict[str, Any]] = None,
is_sentence_transformer: bool = False, is_sentence_transformer: bool = False,
is_cross_encoder: bool = False, is_cross_encoder: bool = False,
skip_tokenizer_init: bool = False, skip_tokenizer_init: bool = False,
auto_cls: Type[_BaseAutoModelClass] = AutoModelForCausalLM, auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
postprocess_inputs: Callable[..., BatchEncoding] = identity, postprocess_inputs: Callable[..., BatchEncoding] = identity,
) -> None: ) -> None:
torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype] torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
...@@ -334,11 +333,11 @@ class HfRunner: ...@@ -334,11 +333,11 @@ class HfRunner:
def get_inputs( def get_inputs(
self, self,
prompts: List[str], prompts: list[str],
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
) -> List[BatchEncoding]: ) -> list[BatchEncoding]:
if images is not None: if images is not None:
assert len(prompts) == len(images) assert len(prompts) == len(images)
...@@ -348,9 +347,9 @@ class HfRunner: ...@@ -348,9 +347,9 @@ class HfRunner:
if audios is not None: if audios is not None:
assert len(prompts) == len(audios) assert len(prompts) == len(audios)
all_inputs: List[BatchEncoding] = [] all_inputs: list[BatchEncoding] = []
for i, prompt in enumerate(prompts): for i, prompt in enumerate(prompts):
processor_kwargs: Dict[str, Any] = { processor_kwargs: dict[str, Any] = {
"text": prompt, "text": prompt,
"return_tensors": "pt", "return_tensors": "pt",
} }
...@@ -370,7 +369,7 @@ class HfRunner: ...@@ -370,7 +369,7 @@ class HfRunner:
return all_inputs return all_inputs
def classify(self, prompts: List[str]) -> List[str]: def classify(self, prompts: list[str]) -> list[str]:
# output is final logits # output is final logits
all_inputs = self.get_inputs(prompts) all_inputs = self.get_inputs(prompts)
outputs = [] outputs = []
...@@ -383,18 +382,18 @@ class HfRunner: ...@@ -383,18 +382,18 @@ class HfRunner:
def generate( def generate(
self, self,
prompts: List[str], prompts: list[str],
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
**kwargs: Any, **kwargs: Any,
) -> List[Tuple[List[List[int]], List[str]]]: ) -> list[tuple[list[list[int]], list[str]]]:
all_inputs = self.get_inputs(prompts, all_inputs = self.get_inputs(prompts,
images=images, images=images,
videos=videos, videos=videos,
audios=audios) audios=audios)
outputs: List[Tuple[List[List[int]], List[str]]] = [] outputs: list[tuple[list[list[int]], list[str]]] = []
for inputs in all_inputs: for inputs in all_inputs:
output_ids = self.model.generate( output_ids = self.model.generate(
**self.wrap_device(inputs, device=self.model.device.type), **self.wrap_device(inputs, device=self.model.device.type),
...@@ -412,13 +411,13 @@ class HfRunner: ...@@ -412,13 +411,13 @@ class HfRunner:
def generate_greedy( def generate_greedy(
self, self,
prompts: List[str], prompts: list[str],
max_tokens: int, max_tokens: int,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
**kwargs: Any, **kwargs: Any,
) -> List[Tuple[List[int], str]]: ) -> list[tuple[list[int], str]]:
outputs = self.generate(prompts, outputs = self.generate(prompts,
do_sample=False, do_sample=False,
max_new_tokens=max_tokens, max_new_tokens=max_tokens,
...@@ -432,10 +431,10 @@ class HfRunner: ...@@ -432,10 +431,10 @@ class HfRunner:
def generate_beam_search( def generate_beam_search(
self, self,
prompts: List[str], prompts: list[str],
beam_width: int, beam_width: int,
max_tokens: int, max_tokens: int,
) -> List[Tuple[List[List[int]], List[str]]]: ) -> list[tuple[list[list[int]], list[str]]]:
outputs = self.generate(prompts, outputs = self.generate(prompts,
do_sample=False, do_sample=False,
max_new_tokens=max_tokens, max_new_tokens=max_tokens,
...@@ -453,19 +452,19 @@ class HfRunner: ...@@ -453,19 +452,19 @@ class HfRunner:
def generate_greedy_logprobs( def generate_greedy_logprobs(
self, self,
prompts: List[str], prompts: list[str],
max_tokens: int, max_tokens: int,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
**kwargs: Any, **kwargs: Any,
) -> List[List[torch.Tensor]]: ) -> list[list[torch.Tensor]]:
all_inputs = self.get_inputs(prompts, all_inputs = self.get_inputs(prompts,
images=images, images=images,
videos=videos, videos=videos,
audios=audios) audios=audios)
all_logprobs: List[List[torch.Tensor]] = [] all_logprobs: list[list[torch.Tensor]] = []
for inputs in all_inputs: for inputs in all_inputs:
output = self.model.generate( output = self.model.generate(
**self.wrap_device(inputs, device=self.model.device.type), **self.wrap_device(inputs, device=self.model.device.type),
...@@ -483,11 +482,11 @@ class HfRunner: ...@@ -483,11 +482,11 @@ class HfRunner:
def _hidden_states_to_seq_logprobs( def _hidden_states_to_seq_logprobs(
self, self,
hidden_states: Tuple[Tuple[torch.Tensor, ...], ...], hidden_states: tuple[tuple[torch.Tensor, ...], ...],
) -> List[torch.Tensor]: ) -> list[torch.Tensor]:
output_embeddings = self.model.get_output_embeddings() output_embeddings = self.model.get_output_embeddings()
seq_logprobs: List[torch.Tensor] = [] seq_logprobs: list[torch.Tensor] = []
for _, hidden_state in enumerate(hidden_states): for _, hidden_state in enumerate(hidden_states):
last_hidden_states = hidden_state[-1][0] last_hidden_states = hidden_state[-1][0]
logits = torch.matmul( logits = torch.matmul(
...@@ -503,14 +502,14 @@ class HfRunner: ...@@ -503,14 +502,14 @@ class HfRunner:
def _hidden_states_to_logprobs( def _hidden_states_to_logprobs(
self, self,
hidden_states: Tuple[Tuple[torch.Tensor, ...], ...], hidden_states: tuple[tuple[torch.Tensor, ...], ...],
num_logprobs: int, num_logprobs: int,
) -> Tuple[List[Dict[int, float]], int]: ) -> tuple[list[dict[int, float]], int]:
seq_logprobs = self._hidden_states_to_seq_logprobs(hidden_states) seq_logprobs = self._hidden_states_to_seq_logprobs(hidden_states)
output_len = len(hidden_states) output_len = len(hidden_states)
# convert to dict # convert to dict
seq_logprobs_lst: List[Dict[int, float]] = [] seq_logprobs_lst: list[dict[int, float]] = []
for tok_idx, tok_logprobs in enumerate(seq_logprobs): for tok_idx, tok_logprobs in enumerate(seq_logprobs):
# drop prompt logprobs # drop prompt logprobs
if tok_idx == 0: if tok_idx == 0:
...@@ -530,22 +529,22 @@ class HfRunner: ...@@ -530,22 +529,22 @@ class HfRunner:
def generate_greedy_logprobs_limit( def generate_greedy_logprobs_limit(
self, self,
prompts: List[str], prompts: list[str],
max_tokens: int, max_tokens: int,
num_logprobs: int, num_logprobs: int,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
**kwargs: Any, **kwargs: Any,
) -> List[TokensTextLogprobs]: ) -> list[TokensTextLogprobs]:
all_inputs = self.get_inputs(prompts, all_inputs = self.get_inputs(prompts,
images=images, images=images,
videos=videos, videos=videos,
audios=audios) audios=audios)
all_logprobs: List[List[Dict[int, float]]] = [] all_logprobs: list[list[dict[int, float]]] = []
all_output_ids: List[List[int]] = [] all_output_ids: list[list[int]] = []
all_output_strs: List[str] = [] all_output_strs: list[str] = []
for inputs in all_inputs: for inputs in all_inputs:
output = self.model.generate( output = self.model.generate(
...@@ -577,23 +576,23 @@ class HfRunner: ...@@ -577,23 +576,23 @@ class HfRunner:
def generate_encoder_decoder_greedy_logprobs_limit( def generate_encoder_decoder_greedy_logprobs_limit(
self, self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]], encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]],
max_tokens: int, max_tokens: int,
num_logprobs: int, num_logprobs: int,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
**kwargs: Any, **kwargs: Any,
) -> List[TokensTextLogprobs]: ) -> list[TokensTextLogprobs]:
''' '''
Greedy logprobs generation for vLLM encoder/decoder models Greedy logprobs generation for vLLM encoder/decoder models
''' '''
all_logprobs: List[List[Dict[int, float]]] = [] all_logprobs: list[list[dict[int, float]]] = []
all_output_ids: List[List[int]] = [] all_output_ids: list[list[int]] = []
all_output_strs: List[str] = [] all_output_strs: list[str] = []
for i, (encoder_prompt, decoder_prompt) in enumerate( for i, (encoder_prompt, decoder_prompt) in enumerate(
to_enc_dec_tuple_list(encoder_decoder_prompts)): to_enc_dec_tuple_list(encoder_decoder_prompts)):
processor_kwargs: Dict[str, Any] = { processor_kwargs: dict[str, Any] = {
"text": encoder_prompt, "text": encoder_prompt,
"return_tensors": "pt", "return_tensors": "pt",
} }
...@@ -641,10 +640,10 @@ class HfRunner: ...@@ -641,10 +640,10 @@ class HfRunner:
return [(output_ids, output_str, output_logprobs) return [(output_ids, output_str, output_logprobs)
for output_ids, output_str, output_logprobs in outputs] for output_ids, output_str, output_logprobs in outputs]
def encode(self, prompts: List[str]) -> List[List[torch.Tensor]]: def encode(self, prompts: list[str]) -> list[list[torch.Tensor]]:
return self.model.encode(prompts) return self.model.encode(prompts)
def predict(self, prompts: List[List[str]]) -> torch.Tensor: def predict(self, prompts: list[list[str]]) -> torch.Tensor:
return self.model.predict(prompts, convert_to_tensor=True) return self.model.predict(prompts, convert_to_tensor=True)
def __enter__(self): def __enter__(self):
...@@ -699,11 +698,11 @@ class VllmRunner: ...@@ -699,11 +698,11 @@ class VllmRunner:
def get_inputs( def get_inputs(
self, self,
prompts: List[str], prompts: list[str],
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
) -> List[TextPrompt]: ) -> list[TextPrompt]:
if images is not None: if images is not None:
assert len(prompts) == len(images) assert len(prompts) == len(images)
...@@ -733,13 +732,13 @@ class VllmRunner: ...@@ -733,13 +732,13 @@ class VllmRunner:
def generate( def generate(
self, self,
prompts: List[str], prompts: list[str],
sampling_params: SamplingParams, sampling_params: SamplingParams,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
**kwargs: Any, **kwargs: Any,
) -> List[Tuple[List[List[int]], List[str]]]: ) -> list[tuple[list[list[int]], list[str]]]:
inputs = self.get_inputs(prompts, inputs = self.get_inputs(prompts,
images=images, images=images,
videos=videos, videos=videos,
...@@ -749,12 +748,12 @@ class VllmRunner: ...@@ -749,12 +748,12 @@ class VllmRunner:
sampling_params=sampling_params, sampling_params=sampling_params,
**kwargs) **kwargs)
outputs: List[Tuple[List[List[int]], List[str]]] = [] outputs: list[tuple[list[list[int]], list[str]]] = []
for req_output in req_outputs: for req_output in req_outputs:
prompt_str = req_output.prompt prompt_str = req_output.prompt
prompt_ids = req_output.prompt_token_ids prompt_ids = req_output.prompt_token_ids
req_sample_output_ids: List[List[int]] = [] req_sample_output_ids: list[list[int]] = []
req_sample_output_strs: List[str] = [] req_sample_output_strs: list[str] = []
for sample in req_output.outputs: for sample in req_output.outputs:
output_str = sample.text output_str = sample.text
output_ids = list(sample.token_ids) output_ids = list(sample.token_ids)
...@@ -765,9 +764,9 @@ class VllmRunner: ...@@ -765,9 +764,9 @@ class VllmRunner:
@staticmethod @staticmethod
def _final_steps_generate_w_logprobs( def _final_steps_generate_w_logprobs(
req_outputs: List[RequestOutput], req_outputs: list[RequestOutput],
) -> List[TokensTextLogprobsPromptLogprobs]: ) -> list[TokensTextLogprobsPromptLogprobs]:
outputs: List[TokensTextLogprobsPromptLogprobs] = [] outputs: list[TokensTextLogprobsPromptLogprobs] = []
for req_output in req_outputs: for req_output in req_outputs:
assert len(req_output.outputs) > 0 assert len(req_output.outputs) > 0
for sample in req_output.outputs: for sample in req_output.outputs:
...@@ -780,14 +779,14 @@ class VllmRunner: ...@@ -780,14 +779,14 @@ class VllmRunner:
def generate_w_logprobs( def generate_w_logprobs(
self, self,
prompts: List[str], prompts: list[str],
sampling_params: SamplingParams, sampling_params: SamplingParams,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
**kwargs: Any, **kwargs: Any,
) -> Union[List[TokensTextLogprobs], ) -> Union[list[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]: list[TokensTextLogprobsPromptLogprobs]]:
inputs = self.get_inputs(prompts, inputs = self.get_inputs(prompts,
images=images, images=images,
videos=videos, videos=videos,
...@@ -806,10 +805,10 @@ class VllmRunner: ...@@ -806,10 +805,10 @@ class VllmRunner:
def generate_encoder_decoder_w_logprobs( def generate_encoder_decoder_w_logprobs(
self, self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]], encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]],
sampling_params: SamplingParams, sampling_params: SamplingParams,
) -> Union[List[TokensTextLogprobs], ) -> Union[list[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]: list[TokensTextLogprobsPromptLogprobs]]:
''' '''
Logprobs generation for vLLM encoder/decoder models Logprobs generation for vLLM encoder/decoder models
''' '''
...@@ -826,13 +825,13 @@ class VllmRunner: ...@@ -826,13 +825,13 @@ class VllmRunner:
def generate_greedy( def generate_greedy(
self, self,
prompts: List[str], prompts: list[str],
max_tokens: int, max_tokens: int,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
**kwargs: Any, **kwargs: Any,
) -> List[Tuple[List[int], str]]: ) -> list[tuple[list[int], str]]:
greedy_params = SamplingParams(temperature=0.0, max_tokens=max_tokens) greedy_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
outputs = self.generate(prompts, outputs = self.generate(prompts,
greedy_params, greedy_params,
...@@ -845,18 +844,18 @@ class VllmRunner: ...@@ -845,18 +844,18 @@ class VllmRunner:
def generate_greedy_logprobs( def generate_greedy_logprobs(
self, self,
prompts: List[str], prompts: list[str],
max_tokens: int, max_tokens: int,
num_logprobs: int, num_logprobs: int,
num_prompt_logprobs: Optional[int] = None, num_prompt_logprobs: Optional[int] = None,
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
stop_token_ids: Optional[List[int]] = None, stop_token_ids: Optional[list[int]] = None,
stop: Optional[List[str]] = None, stop: Optional[list[str]] = None,
**kwargs: Any, **kwargs: Any,
) -> Union[List[TokensTextLogprobs], ) -> Union[list[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]: list[TokensTextLogprobsPromptLogprobs]]:
greedy_logprobs_params = SamplingParams( greedy_logprobs_params = SamplingParams(
temperature=0.0, temperature=0.0,
max_tokens=max_tokens, max_tokens=max_tokens,
...@@ -874,12 +873,12 @@ class VllmRunner: ...@@ -874,12 +873,12 @@ class VllmRunner:
def generate_encoder_decoder_greedy_logprobs( def generate_encoder_decoder_greedy_logprobs(
self, self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]], encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]],
max_tokens: int, max_tokens: int,
num_logprobs: int, num_logprobs: int,
num_prompt_logprobs: Optional[int] = None, num_prompt_logprobs: Optional[int] = None,
) -> Union[List[TokensTextLogprobs], ) -> Union[list[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]: list[TokensTextLogprobsPromptLogprobs]]:
greedy_logprobs_params = SamplingParams( greedy_logprobs_params = SamplingParams(
temperature=0.0, temperature=0.0,
max_tokens=max_tokens, max_tokens=max_tokens,
...@@ -895,10 +894,10 @@ class VllmRunner: ...@@ -895,10 +894,10 @@ class VllmRunner:
def generate_beam_search( def generate_beam_search(
self, self,
prompts: Union[List[str], List[List[int]]], prompts: Union[list[str], list[list[int]]],
beam_width: int, beam_width: int,
max_tokens: int, max_tokens: int,
) -> List[Tuple[List[List[int]], List[str]]]: ) -> list[tuple[list[list[int]], list[str]]]:
if is_list_of(prompts, str, check="all"): if is_list_of(prompts, str, check="all"):
prompts = [TextPrompt(prompt=prompt) for prompt in prompts] prompts = [TextPrompt(prompt=prompt) for prompt in prompts]
else: else:
...@@ -915,17 +914,17 @@ class VllmRunner: ...@@ -915,17 +914,17 @@ class VllmRunner:
returned_outputs.append((token_ids, texts)) returned_outputs.append((token_ids, texts))
return returned_outputs return returned_outputs
def classify(self, prompts: List[str]) -> List[List[float]]: def classify(self, prompts: list[str]) -> list[list[float]]:
req_outputs = self.model.classify(prompts) req_outputs = self.model.classify(prompts)
return [req_output.outputs.probs for req_output in req_outputs] return [req_output.outputs.probs for req_output in req_outputs]
def encode( def encode(
self, self,
prompts: List[str], prompts: list[str],
images: Optional[PromptImageInput] = None, images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None, videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None, audios: Optional[PromptAudioInput] = None,
) -> List[List[float]]: ) -> list[list[float]]:
inputs = self.get_inputs(prompts, inputs = self.get_inputs(prompts,
images=images, images=images,
videos=videos, videos=videos,
...@@ -936,9 +935,9 @@ class VllmRunner: ...@@ -936,9 +935,9 @@ class VllmRunner:
def score( def score(
self, self,
text_1: Union[str, List[str]], text_1: Union[str, list[str]],
text_2: Union[str, List[str]], text_2: Union[str, list[str]],
) -> List[float]: ) -> list[float]:
req_outputs = self.model.score(text_1, text_2) req_outputs = self.model.score(text_1, text_2)
return [req_output.outputs.score for req_output in req_outputs] return [req_output.outputs.score for req_output in req_outputs]
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import Callable, Iterable, Optional from collections.abc import Iterable
from typing import Callable, Optional
import pytest import pytest
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import random import random
from typing import List
import pytest import pytest
...@@ -137,9 +136,9 @@ def prep_prompts(batch_size: int): ...@@ -137,9 +136,9 @@ def prep_prompts(batch_size: int):
The prompt is just under 10k tokens; sliding window is 4k The prompt is just under 10k tokens; sliding window is 4k
so the answer is outside sliding window, but should still be correct. so the answer is outside sliding window, but should still be correct.
""" """
prompts: List[str] = [] prompts: list[str] = []
answer: List[int] = [] answer: list[int] = []
indices: List[int] = [] indices: list[int] = []
random.seed(1) random.seed(1)
for _ in range(batch_size): for _ in range(batch_size):
idx = random.randint(30, 90) idx = random.randint(30, 90)
...@@ -158,7 +157,7 @@ def prep_prompts(batch_size: int): ...@@ -158,7 +157,7 @@ def prep_prompts(batch_size: int):
return prompts, answer, indices return prompts, answer, indices
def check_answers(indices: List[int], answer: List[int], outputs: List[str]): def check_answers(indices: list[int], answer: list[int], outputs: list[str]):
answer2 = [int(text[0:2].strip()) for text in outputs] answer2 = [int(text[0:2].strip()) for text in outputs]
print(list(zip(indices, zip(answer, answer2)))) print(list(zip(indices, zip(answer, answer2))))
numok = 0 numok = 0
...@@ -170,7 +169,7 @@ def check_answers(indices: List[int], answer: List[int], outputs: List[str]): ...@@ -170,7 +169,7 @@ def check_answers(indices: List[int], answer: List[int], outputs: List[str]):
assert frac_ok > 0.7 assert frac_ok > 0.7
def check_window(prompts: List[str]): def check_window(prompts: list[str]):
def inner(llm: LLM): def inner(llm: LLM):
sliding_window = llm.llm_engine.model_config.get_sliding_window() sliding_window = llm.llm_engine.model_config.get_sliding_window()
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest import pytest
from vllm.core.block.block_table import BlockTable from vllm.core.block.block_table import BlockTable
...@@ -32,7 +30,7 @@ def test_allocate_naive(block_size: int, sequence_len: int): ...@@ -32,7 +30,7 @@ def test_allocate_naive(block_size: int, sequence_len: int):
token_ids = list(range(sequence_len)) token_ids = list(range(sequence_len))
num_blocks_per_alloc = len(list(chunk_list(token_ids, block_size))) num_blocks_per_alloc = len(list(chunk_list(token_ids, block_size)))
block_tables: List[BlockTable] = [] block_tables: list[BlockTable] = []
for i in range(5): for i in range(5):
assert allocator.get_num_free_blocks( assert allocator.get_num_free_blocks(
device=Device.GPU) == num_gpu_blocks - i * num_blocks_per_alloc device=Device.GPU) == num_gpu_blocks - i * num_blocks_per_alloc
...@@ -77,7 +75,7 @@ def test_allocate_prefix_caching(block_size: int, sequence_len: int): ...@@ -77,7 +75,7 @@ def test_allocate_prefix_caching(block_size: int, sequence_len: int):
num_immutable_blocks_per_alloc = len( num_immutable_blocks_per_alloc = len(
chunked_tokens) - num_mutable_blocks_per_alloc chunked_tokens) - num_mutable_blocks_per_alloc
block_tables: List[BlockTable] = [] block_tables: list[BlockTable] = []
for alloc_i in range(1, 6): for alloc_i in range(1, 6):
block_tables.append( block_tables.append(
...@@ -272,7 +270,7 @@ def test_append_token_ids_correct_content(block_size: int, sequence_len: int, ...@@ -272,7 +270,7 @@ def test_append_token_ids_correct_content(block_size: int, sequence_len: int,
) )
block_table.allocate(token_ids=token_ids, device=Device.GPU) block_table.allocate(token_ids=token_ids, device=Device.GPU)
appended_so_far: List[int] = [] appended_so_far: list[int] = []
for append in chunk_list(token_ids_to_append, append_size): for append in chunk_list(token_ids_to_append, append_size):
block_table.append_token_ids(append) block_table.append_token_ids(append)
appended_so_far.extend(append) appended_so_far.extend(append)
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List, Optional from typing import Optional
import pytest import pytest
...@@ -14,7 +14,7 @@ class TestNaiveBlockAllocator: ...@@ -14,7 +14,7 @@ class TestNaiveBlockAllocator:
def create_allocate_lambda(allocate_type: str, def create_allocate_lambda(allocate_type: str,
allocator: NaiveBlockAllocator, allocator: NaiveBlockAllocator,
prev_block: Optional[Block], prev_block: Optional[Block],
token_ids: List[int]): token_ids: list[int]):
if allocate_type == "immutable": if allocate_type == "immutable":
allocate_block = lambda: allocator.allocate_immutable_block( allocate_block = lambda: allocator.allocate_immutable_block(
prev_block=prev_block, token_ids=token_ids) prev_block=prev_block, token_ids=token_ids)
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import math import math
import random import random
from typing import List, Optional from typing import Optional
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest import pytest
...@@ -123,11 +123,11 @@ class TestPrefixCachingBlock: ...@@ -123,11 +123,11 @@ class TestPrefixCachingBlock:
@staticmethod @staticmethod
def create_chain(block_size: int, def create_chain(block_size: int,
token_ids: List[int], token_ids: list[int],
num_empty_trailing_blocks=0) -> List[PrefixCachingBlock]: num_empty_trailing_blocks=0) -> list[PrefixCachingBlock]:
"""Helper method which creates a chain of blocks. """Helper method which creates a chain of blocks.
""" """
blocks: List[PrefixCachingBlock] = [] blocks: list[PrefixCachingBlock] = []
num_blocks = math.ceil( num_blocks = math.ceil(
len(token_ids) / block_size) + num_empty_trailing_blocks len(token_ids) / block_size) + num_empty_trailing_blocks
...@@ -161,7 +161,7 @@ class TestPrefixCachingBlockAllocator: ...@@ -161,7 +161,7 @@ class TestPrefixCachingBlockAllocator:
@staticmethod @staticmethod
def create_allocate_lambda(allocate_type: str, allocator: BlockAllocator, def create_allocate_lambda(allocate_type: str, allocator: BlockAllocator,
prev_block: Optional[Block], prev_block: Optional[Block],
token_ids: List[int]): token_ids: list[int]):
if allocate_type == "immutable": if allocate_type == "immutable":
allocate_block = lambda: allocator.allocate_immutable_block( allocate_block = lambda: allocator.allocate_immutable_block(
prev_block=prev_block, token_ids=token_ids) prev_block=prev_block, token_ids=token_ids)
...@@ -839,13 +839,13 @@ class TestPrefixCachingBlockAllocator: ...@@ -839,13 +839,13 @@ class TestPrefixCachingBlockAllocator:
@staticmethod @staticmethod
def create_immutable_chain( def create_immutable_chain(
block_size: int, block_size: int,
token_ids: List[int], token_ids: list[int],
allocator: PrefixCachingBlockAllocator, allocator: PrefixCachingBlockAllocator,
extra_hash: Optional[int] = None, extra_hash: Optional[int] = None,
) -> List[PrefixCachingBlock]: ) -> list[PrefixCachingBlock]:
"""Helper method which creates a chain of blocks. """Helper method which creates a chain of blocks.
""" """
blocks: List[Block] = [] blocks: list[Block] = []
num_blocks = math.ceil(len(token_ids) / block_size) num_blocks = math.ceil(len(token_ids) / block_size)
if num_blocks == 0: if num_blocks == 0:
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest # noqa import pytest # noqa
...@@ -46,7 +45,7 @@ def test_simple(): ...@@ -46,7 +45,7 @@ def test_simple():
cache_config.num_cpu_blocks = 8 cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8 cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(num_seq_group): for i in range(num_seq_group):
...@@ -93,7 +92,7 @@ def test_chunk(): ...@@ -93,7 +92,7 @@ def test_chunk():
cache_config.num_cpu_blocks = 32 cache_config.num_cpu_blocks = 32
cache_config.num_gpu_blocks = 32 cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(2): for i in range(2):
...@@ -145,7 +144,7 @@ def test_concurrent_chunking(): ...@@ -145,7 +144,7 @@ def test_concurrent_chunking():
cache_config.num_cpu_blocks = 32 cache_config.num_cpu_blocks = 32
cache_config.num_gpu_blocks = 32 cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(2): for i in range(2):
...@@ -226,8 +225,8 @@ def test_short_prompts_jump_long_prompts_in_queue(): ...@@ -226,8 +225,8 @@ def test_short_prompts_jump_long_prompts_in_queue():
cache_config.num_cpu_blocks = 3200 # large KV cache size for large requests cache_config.num_cpu_blocks = 3200 # large KV cache size for large requests
cache_config.num_gpu_blocks = 3200 cache_config.num_gpu_blocks = 3200
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
long_seqs: List[SequenceGroup] = [] long_seqs: list[SequenceGroup] = []
short_seqs: List[SequenceGroup] = [] short_seqs: list[SequenceGroup] = []
# Add 2 large seq groups to scheduler. # Add 2 large seq groups to scheduler.
for i in range(2): for i in range(2):
...@@ -368,7 +367,7 @@ def test_complex(): ...@@ -368,7 +367,7 @@ def test_complex():
cache_config.num_cpu_blocks = 64 cache_config.num_cpu_blocks = 64
cache_config.num_gpu_blocks = 64 cache_config.num_gpu_blocks = 64
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(2): for i in range(2):
...@@ -439,7 +438,7 @@ def test_maximal_decoding(): ...@@ -439,7 +438,7 @@ def test_maximal_decoding():
cache_config.num_cpu_blocks = 8 cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8 cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(2): for i in range(2):
...@@ -533,7 +532,7 @@ def test_prompt_limit(): ...@@ -533,7 +532,7 @@ def test_prompt_limit():
cache_config.num_cpu_blocks = 16 cache_config.num_cpu_blocks = 16
cache_config.num_gpu_blocks = 16 cache_config.num_gpu_blocks = 16
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
_, seq_group = create_dummy_prompt("1", _, seq_group = create_dummy_prompt("1",
prompt_length=48, prompt_length=48,
...@@ -565,7 +564,7 @@ def test_prompt_limit_exceed(): ...@@ -565,7 +564,7 @@ def test_prompt_limit_exceed():
cache_config.num_cpu_blocks = 16 cache_config.num_cpu_blocks = 16
cache_config.num_gpu_blocks = 16 cache_config.num_gpu_blocks = 16
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
_, seq_group = create_dummy_prompt("2", _, seq_group = create_dummy_prompt("2",
prompt_length=48, prompt_length=48,
block_size=block_size) block_size=block_size)
...@@ -699,7 +698,7 @@ def test_chunked_prefill_max_seqs(): ...@@ -699,7 +698,7 @@ def test_chunked_prefill_max_seqs():
cache_config.num_cpu_blocks = 128 cache_config.num_cpu_blocks = 128
cache_config.num_gpu_blocks = 128 cache_config.num_gpu_blocks = 128
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
_, seq_group = create_dummy_prompt("1", _, seq_group = create_dummy_prompt("1",
prompt_length=65, prompt_length=65,
...@@ -758,7 +757,7 @@ def test_prefix_caching(): ...@@ -758,7 +757,7 @@ def test_prefix_caching():
cache_config.num_cpu_blocks = 0 cache_config.num_cpu_blocks = 0
cache_config.num_gpu_blocks = 32 cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(2): for i in range(2):
...@@ -800,7 +799,7 @@ def test_prefix_caching_with_concurrent_partial_prefills(): ...@@ -800,7 +799,7 @@ def test_prefix_caching_with_concurrent_partial_prefills():
cache_config.num_cpu_blocks = 0 cache_config.num_cpu_blocks = 0
cache_config.num_gpu_blocks = 32 cache_config.num_gpu_blocks = 32
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(2): for i in range(2):
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
import time import time
from collections import deque from collections import deque
from typing import List, Set, Tuple
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest # noqa import pytest # noqa
...@@ -57,7 +56,7 @@ def test_scheduler_abort_seq_group(): ...@@ -57,7 +56,7 @@ def test_scheduler_abort_seq_group():
# Add multiple seq groups to scheduler. # Add multiple seq groups to scheduler.
num_seq_group = 4 num_seq_group = 4
request_ids: Set[str] = set() request_ids: set[str] = set()
for i in range(num_seq_group): for i in range(num_seq_group):
_, seq_group = create_dummy_prompt(str(i), block_size) _, seq_group = create_dummy_prompt(str(i), block_size)
scheduler.add_seq_group(seq_group) scheduler.add_seq_group(seq_group)
...@@ -83,7 +82,7 @@ def test_scheduler_schedule_simple(): ...@@ -83,7 +82,7 @@ def test_scheduler_schedule_simple():
cache_config.num_cpu_blocks = 8 cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8 cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(num_seq_group): for i in range(num_seq_group):
...@@ -221,7 +220,7 @@ def test_scheduler_max_seqs(): ...@@ -221,7 +220,7 @@ def test_scheduler_max_seqs():
cache_config.num_gpu_blocks = 8 cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
all_seq_groups: List[SequenceGroup] = [] all_seq_groups: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
for i in range(num_seq_group): for i in range(num_seq_group):
_, seq_group = create_dummy_prompt(str(i), _, seq_group = create_dummy_prompt(str(i),
...@@ -480,7 +479,7 @@ def test_prefill_schedule_max_lora(): ...@@ -480,7 +479,7 @@ def test_prefill_schedule_max_lora():
num_cpu_blocks=64, num_cpu_blocks=64,
num_gpu_blocks=64) num_gpu_blocks=64)
budget = create_token_budget(token_budget=120) budget = create_token_budget(token_budget=120)
curr_loras: Set[int] = set() curr_loras: set[int] = set()
for i in range(2): for i in range(2):
_, seq_group = create_dummy_prompt(str(i), _, seq_group = create_dummy_prompt(str(i),
prompt_length=60, prompt_length=60,
...@@ -651,8 +650,8 @@ def test_schedule_swapped_max_loras(): ...@@ -651,8 +650,8 @@ def test_schedule_swapped_max_loras():
block_size=block_size, block_size=block_size,
num_cpu_blocks=32, num_cpu_blocks=32,
num_gpu_blocks=32) num_gpu_blocks=32)
curr_loras: Set[int] = set() curr_loras: set[int] = set()
blocks_to_swap_out: List[Tuple[int, int]] = [] blocks_to_swap_out: list[tuple[int, int]] = []
for i in range(2): for i in range(2):
_, seq_group = create_dummy_prompt(str(i), _, seq_group = create_dummy_prompt(str(i),
prompt_length=60, prompt_length=60,
...@@ -683,7 +682,7 @@ def test_schedule_swapped_cannot_swap_in(): ...@@ -683,7 +682,7 @@ def test_schedule_swapped_cannot_swap_in():
num_cpu_blocks=32, num_cpu_blocks=32,
num_gpu_blocks=32) num_gpu_blocks=32)
curr_loras = None curr_loras = None
blocks_to_swap_out: List[Tuple[int, int]] = [] blocks_to_swap_out: list[tuple[int, int]] = []
for i in range(2): for i in range(2):
_, seq_group = create_dummy_prompt(str(i), _, seq_group = create_dummy_prompt(str(i),
prompt_length=60, prompt_length=60,
...@@ -714,7 +713,7 @@ def test_infeasible_swap(): ...@@ -714,7 +713,7 @@ def test_infeasible_swap():
num_cpu_blocks=32, num_cpu_blocks=32,
num_gpu_blocks=32) num_gpu_blocks=32)
curr_loras = None curr_loras = None
blocks_to_swap_out: List[Tuple[int, int]] = [] blocks_to_swap_out: list[tuple[int, int]] = []
for i in range(2): for i in range(2):
_, seq_group = create_dummy_prompt(str(i), _, seq_group = create_dummy_prompt(str(i),
prompt_length=60, prompt_length=60,
...@@ -752,7 +751,7 @@ def test_schedule_swapped_blocks_to_copy(): ...@@ -752,7 +751,7 @@ def test_schedule_swapped_blocks_to_copy():
block_size=block_size) block_size=block_size)
scheduler._allocate_and_set_running(seq_group) scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1) append_new_token_seq_group(60, seq_group, 1)
blocks_to_swap_out: List[Tuple[int, int]] = [] blocks_to_swap_out: list[tuple[int, int]] = []
scheduler._swap_out(seq_group, blocks_to_swap_out) scheduler._swap_out(seq_group, blocks_to_swap_out)
scheduler._add_seq_group_to_swapped(seq_group) scheduler._add_seq_group_to_swapped(seq_group)
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest # noqa import pytest # noqa
from vllm.config import CacheConfig, SchedulerConfig from vllm.config import CacheConfig, SchedulerConfig
...@@ -48,7 +46,7 @@ def test_scheduler_schedule_simple_encoder_decoder(): ...@@ -48,7 +46,7 @@ def test_scheduler_schedule_simple_encoder_decoder():
cache_config.num_cpu_blocks = 16 # enc and dec prompts per seq_group cache_config.num_cpu_blocks = 16 # enc and dec prompts per seq_group
cache_config.num_gpu_blocks = 16 # enc and dec prompts per seq_group cache_config.num_gpu_blocks = 16 # enc and dec prompts per seq_group
scheduler = Scheduler(scheduler_config, cache_config, None) scheduler = Scheduler(scheduler_config, cache_config, None)
running: List[SequenceGroup] = [] running: list[SequenceGroup] = []
# Add seq groups to scheduler. # Add seq groups to scheduler.
req_id_list = [] req_id_list = []
......
...@@ -2,9 +2,8 @@ ...@@ -2,9 +2,8 @@
import time import time
from collections import defaultdict from collections import defaultdict
from typing import Any, Dict, List, Optional from collections.abc import Sequence as GenericSequence
from typing import Sequence as GenericSequence from typing import Any, Optional
from typing import Tuple
from vllm import SamplingParams from vllm import SamplingParams
from vllm.core.scheduler import Scheduler, SchedulerOutputs from vllm.core.scheduler import Scheduler, SchedulerOutputs
...@@ -20,10 +19,10 @@ def create_dummy_prompt( ...@@ -20,10 +19,10 @@ def create_dummy_prompt(
block_size: Optional[int] = None, block_size: Optional[int] = None,
lora_request: Optional[LoRARequest] = None, lora_request: Optional[LoRARequest] = None,
best_of: int = 1, best_of: int = 1,
prompt_tokens: Optional[List[int]] = None, prompt_tokens: Optional[list[int]] = None,
min_tokens: int = 0, min_tokens: int = 0,
max_tokens: int = 16, max_tokens: int = 16,
) -> Tuple[Sequence, SequenceGroup]: ) -> tuple[Sequence, SequenceGroup]:
if not block_size: if not block_size:
block_size = prompt_length block_size = prompt_length
...@@ -48,7 +47,7 @@ def create_dummy_prompt( ...@@ -48,7 +47,7 @@ def create_dummy_prompt(
return prompt, seq_group return prompt, seq_group
def create_dummy_lora_sequence(request_id: int, token_ids: List[int], def create_dummy_lora_sequence(request_id: int, token_ids: list[int],
block_size: int, lora_int_id: int) -> Sequence: block_size: int, lora_int_id: int) -> Sequence:
return Sequence(seq_id=request_id, return Sequence(seq_id=request_id,
inputs=token_inputs(token_ids), inputs=token_inputs(token_ids),
...@@ -58,7 +57,7 @@ def create_dummy_lora_sequence(request_id: int, token_ids: List[int], ...@@ -58,7 +57,7 @@ def create_dummy_lora_sequence(request_id: int, token_ids: List[int],
lora_int_id=lora_int_id)) lora_int_id=lora_int_id))
def create_dummy_sequence(request_id: int, token_ids: List[int], def create_dummy_sequence(request_id: int, token_ids: list[int],
block_size: int) -> Sequence: block_size: int) -> Sequence:
return Sequence( return Sequence(
seq_id=request_id, seq_id=request_id,
...@@ -74,7 +73,7 @@ def create_dummy_prompt_encoder_decoder( ...@@ -74,7 +73,7 @@ def create_dummy_prompt_encoder_decoder(
block_size: Optional[int] = None, block_size: Optional[int] = None,
lora_request: Optional[LoRARequest] = None, lora_request: Optional[LoRARequest] = None,
best_of: int = 1, best_of: int = 1,
) -> Tuple[Sequence, Sequence, SequenceGroup]: ) -> tuple[Sequence, Sequence, SequenceGroup]:
if not block_size: if not block_size:
block_size = decoder_prompt_length block_size = decoder_prompt_length
...@@ -125,7 +124,7 @@ def create_seq_group( ...@@ -125,7 +124,7 @@ def create_seq_group(
prompt_token_ids = [0] * seq_prompt_len prompt_token_ids = [0] * seq_prompt_len
seqs: List[Sequence] = [] seqs: list[Sequence] = []
for seq_id_offset, output_len in enumerate(seq_output_lens): for seq_id_offset, output_len in enumerate(seq_output_lens):
seq = Sequence( seq = Sequence(
seq_id=seq_id_start + seq_id_offset, seq_id=seq_id_start + seq_id_offset,
...@@ -241,7 +240,7 @@ class SchedulerProxy: ...@@ -241,7 +240,7 @@ class SchedulerProxy:
def __init__(self, scheduler: Scheduler): def __init__(self, scheduler: Scheduler):
self.scheduler_ = scheduler self.scheduler_ = scheduler
self.call_history: Dict[str, List[Any]] = defaultdict(list) self.call_history: dict[str, list[Any]] = defaultdict(list)
def __getattr__(self, name: str) -> Any: def __getattr__(self, name: str) -> Any:
...@@ -253,6 +252,6 @@ class SchedulerProxy: ...@@ -253,6 +252,6 @@ class SchedulerProxy:
return wrapper return wrapper
def last_schedule_ret( def last_schedule_ret(
self, ) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs, Any]: self, ) -> tuple[list[SequenceGroupMetadata], SchedulerOutputs, Any]:
_, _, ret = self.call_history["schedule"][-1] _, _, ret = self.call_history["schedule"][-1]
return ret return ret
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Literal, NamedTuple, Optional from typing import Literal, NamedTuple, Optional
import pytest import pytest
...@@ -28,8 +28,8 @@ class EPTestOptions(NamedTuple): ...@@ -28,8 +28,8 @@ class EPTestOptions(NamedTuple):
@dataclass @dataclass
class EPTestSettings: class EPTestSettings:
parallel_setups: List[ParallelSetup] parallel_setups: list[ParallelSetup]
distributed_backends: List[str] distributed_backends: list[str]
task: TaskOption task: TaskOption
test_options: EPTestOptions test_options: EPTestOptions
......
...@@ -9,7 +9,7 @@ WARNING: This test runs in both single-node (4 GPUs) and multi-node ...@@ -9,7 +9,7 @@ WARNING: This test runs in both single-node (4 GPUs) and multi-node
import json import json
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Literal, NamedTuple, Optional from typing import Literal, NamedTuple, Optional
import pytest import pytest
...@@ -38,14 +38,14 @@ class PPTestOptions(NamedTuple): ...@@ -38,14 +38,14 @@ class PPTestOptions(NamedTuple):
@dataclass @dataclass
class PPTestSettings: class PPTestSettings:
parallel_setups: List[ParallelSetup] parallel_setups: list[ParallelSetup]
# NOTE: the length of distributed_backends and # NOTE: the length of distributed_backends and
# vllm_major_versions should be the same, and they # vllm_major_versions should be the same, and they
# are first zipped together to iterate over all # are first zipped together to iterate over all
# test settings. # test settings.
distributed_backends: List[str] distributed_backends: list[str]
# vllm major version: "0" for V0, "1" for V1 # vllm major version: "0" for V0, "1" for V1
vllm_major_versions: List[str] vllm_major_versions: list[str]
task: TaskOption task: TaskOption
test_options: PPTestOptions test_options: PPTestOptions
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
import multiprocessing import multiprocessing
import os import os
from typing import Dict, List
import pytest import pytest
import torch import torch
...@@ -20,9 +19,9 @@ from vllm.utils import update_environment_variables ...@@ -20,9 +19,9 @@ from vllm.utils import update_environment_variables
def distributed_run(fn, world_size): def distributed_run(fn, world_size):
number_of_processes = world_size number_of_processes = world_size
processes: List[multiprocessing.Process] = [] processes: list[multiprocessing.Process] = []
for i in range(number_of_processes): for i in range(number_of_processes):
env: Dict[str, str] = {} env: dict[str, str] = {}
env['RANK'] = str(i) env['RANK'] = str(i)
env['LOCAL_RANK'] = str(i) env['LOCAL_RANK'] = str(i)
env['WORLD_SIZE'] = str(number_of_processes) env['WORLD_SIZE'] = str(number_of_processes)
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
import multiprocessing import multiprocessing
import random import random
import time import time
from typing import List
import numpy as np import numpy as np
import torch.distributed as dist import torch.distributed as dist
...@@ -13,7 +12,7 @@ from vllm.distributed.utils import StatelessProcessGroup ...@@ -13,7 +12,7 @@ from vllm.distributed.utils import StatelessProcessGroup
from vllm.utils import get_ip, get_open_port, update_environment_variables from vllm.utils import get_ip, get_open_port, update_environment_variables
def get_arrays(n: int, seed: int = 0) -> List[np.ndarray]: def get_arrays(n: int, seed: int = 0) -> list[np.ndarray]:
np.random.seed(seed) np.random.seed(seed)
sizes = np.random.randint(1, 10_000, n) sizes = np.random.randint(1, 10_000, n)
# on average, each array will have 5k elements # on average, each array will have 5k elements
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
Run `pytest tests/encoder_decoder/test_e2e_correctness.py`. Run `pytest tests/encoder_decoder/test_e2e_correctness.py`.
""" """
from typing import List, Optional, Tuple from typing import Optional
import pytest import pytest
from transformers import AutoModelForSeq2SeqLM from transformers import AutoModelForSeq2SeqLM
...@@ -22,7 +22,7 @@ LIST_ENC_DEC_SUPPORTED_BACKENDS = [ ...@@ -22,7 +22,7 @@ LIST_ENC_DEC_SUPPORTED_BACKENDS = [
def vllm_to_hf_output( def vllm_to_hf_output(
vllm_output: Tuple[List[int], str, Optional[SampleLogprobs]], vllm_output: tuple[list[int], str, Optional[SampleLogprobs]],
decoder_prompt_type: DecoderPromptType, decoder_prompt_type: DecoderPromptType,
): ):
"""Sanitize vllm output to be comparable with hf output.""" """Sanitize vllm output to be comparable with hf output."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment