"tests/vscode:/vscode.git/clone" did not exist on "aafabaa0d5c87c283b366f81fdce55cf91ae980c"
Unverified Commit 0ae11f78 authored by SangBin Cho's avatar SangBin Cho Committed by GitHub
Browse files

[Mypy] Part 3 fix typing for nested directories for most of directory (#4161)

parent 34128a69
...@@ -185,6 +185,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -185,6 +185,7 @@ class OpenAIServingCompletion(OpenAIServing):
model_name: str, model_name: str,
num_prompts: int, num_prompts: int,
) -> AsyncGenerator[str, None]: ) -> AsyncGenerator[str, None]:
assert request.n is not None
previous_texts = [""] * request.n * num_prompts previous_texts = [""] * request.n * num_prompts
previous_num_tokens = [0] * request.n * num_prompts previous_num_tokens = [0] * request.n * num_prompts
has_echoed = [False] * request.n * num_prompts has_echoed = [False] * request.n * num_prompts
...@@ -202,6 +203,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -202,6 +203,7 @@ class OpenAIServingCompletion(OpenAIServing):
# TODO(simon): optimize the performance by avoiding full # TODO(simon): optimize the performance by avoiding full
# text O(n^2) sending. # text O(n^2) sending.
assert request.max_tokens is not None
if request.echo and request.max_tokens == 0: if request.echo and request.max_tokens == 0:
# only return the prompt # only return the prompt
delta_text = res.prompt delta_text = res.prompt
...@@ -279,7 +281,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -279,7 +281,7 @@ class OpenAIServingCompletion(OpenAIServing):
created_time: int, created_time: int,
model_name: str, model_name: str,
) -> CompletionResponse: ) -> CompletionResponse:
choices = [] choices: List[CompletionResponseChoice] = []
num_prompt_tokens = 0 num_prompt_tokens = 0
num_generated_tokens = 0 num_generated_tokens = 0
for final_res in final_res_batch: for final_res in final_res_batch:
...@@ -289,6 +291,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -289,6 +291,7 @@ class OpenAIServingCompletion(OpenAIServing):
prompt_text = final_res.prompt prompt_text = final_res.prompt
for output in final_res.outputs: for output in final_res.outputs:
assert request.max_tokens is not None
if request.echo and request.max_tokens == 0: if request.echo and request.max_tokens == 0:
token_ids = prompt_token_ids token_ids = prompt_token_ids
top_logprobs = prompt_logprobs top_logprobs = prompt_logprobs
......
...@@ -4,7 +4,9 @@ from dataclasses import dataclass ...@@ -4,7 +4,9 @@ from dataclasses import dataclass
from http import HTTPStatus from http import HTTPStatus
from typing import Dict, List, Optional, Tuple, Union from typing import Dict, List, Optional, Tuple, Union
from pydantic import conint from pydantic import Field
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
from typing_extensions import Annotated
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
...@@ -45,7 +47,8 @@ class OpenAIServing: ...@@ -45,7 +47,8 @@ class OpenAIServing:
] ]
self.max_model_len = 0 self.max_model_len = 0
self.tokenizer = None # Lazy initialized
self.tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
try: try:
event_loop = asyncio.get_running_loop() event_loop = asyncio.get_running_loop()
...@@ -92,7 +95,7 @@ class OpenAIServing: ...@@ -92,7 +95,7 @@ class OpenAIServing:
def _create_logprobs( def _create_logprobs(
self, self,
token_ids: List[int], token_ids: List[int],
top_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None, top_logprobs: List[Optional[Dict[int, Logprob]]],
num_output_top_logprobs: Optional[int] = None, num_output_top_logprobs: Optional[int] = None,
initial_text_offset: int = 0, initial_text_offset: int = 0,
) -> LogProbs: ) -> LogProbs:
...@@ -108,6 +111,7 @@ class OpenAIServing: ...@@ -108,6 +111,7 @@ class OpenAIServing:
token = self.tokenizer.decode(token_id) token = self.tokenizer.decode(token_id)
logprobs.tokens.append(token) logprobs.tokens.append(token)
logprobs.token_logprobs.append(None) logprobs.token_logprobs.append(None)
assert logprobs.top_logprobs is not None
logprobs.top_logprobs.append(None) logprobs.top_logprobs.append(None)
else: else:
token_logprob = step_top_logprobs[token_id].logprob token_logprob = step_top_logprobs[token_id].logprob
...@@ -116,6 +120,7 @@ class OpenAIServing: ...@@ -116,6 +120,7 @@ class OpenAIServing:
logprobs.token_logprobs.append(token_logprob) logprobs.token_logprobs.append(token_logprob)
if num_output_top_logprobs: if num_output_top_logprobs:
assert logprobs.top_logprobs is not None
logprobs.top_logprobs.append({ logprobs.top_logprobs.append({
# Convert float("-inf") to the # Convert float("-inf") to the
# JSON-serializable float that OpenAI uses # JSON-serializable float that OpenAI uses
...@@ -155,9 +160,9 @@ class OpenAIServing: ...@@ -155,9 +160,9 @@ class OpenAIServing:
async def _check_model(self, request) -> Optional[ErrorResponse]: async def _check_model(self, request) -> Optional[ErrorResponse]:
if request.model in self.served_model_names: if request.model in self.served_model_names:
return return None
if request.model in [lora.lora_name for lora in self.lora_requests]: if request.model in [lora.lora_name for lora in self.lora_requests]:
return return None
return self.create_error_response( return self.create_error_response(
message=f"The model `{request.model}` does not exist.", message=f"The model `{request.model}` does not exist.",
err_type="NotFoundError", err_type="NotFoundError",
...@@ -165,7 +170,7 @@ class OpenAIServing: ...@@ -165,7 +170,7 @@ class OpenAIServing:
def _maybe_get_lora(self, request) -> Optional[LoRARequest]: def _maybe_get_lora(self, request) -> Optional[LoRARequest]:
if request.model in self.served_model_names: if request.model in self.served_model_names:
return return None
for lora in self.lora_requests: for lora in self.lora_requests:
if request.model == lora.lora_name: if request.model == lora.lora_name:
return lora return lora
...@@ -177,7 +182,7 @@ class OpenAIServing: ...@@ -177,7 +182,7 @@ class OpenAIServing:
request: Union[ChatCompletionRequest, CompletionRequest], request: Union[ChatCompletionRequest, CompletionRequest],
prompt: Optional[str] = None, prompt: Optional[str] = None,
prompt_ids: Optional[List[int]] = None, prompt_ids: Optional[List[int]] = None,
truncate_prompt_tokens: Optional[conint(ge=1)] = None truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
) -> Tuple[List[int], str]: ) -> Tuple[List[int], str]:
if not (prompt or prompt_ids): if not (prompt or prompt_ids):
raise ValueError("Either prompt or prompt_ids should be provided.") raise ValueError("Either prompt or prompt_ids should be provided.")
......
...@@ -33,7 +33,7 @@ class LoRALayerWeights: ...@@ -33,7 +33,7 @@ class LoRALayerWeights:
def optimize(self) -> "LoRALayerWeights": def optimize(self) -> "LoRALayerWeights":
"""Optimize the LoRA by merging the scaling into lora_b.""" """Optimize the LoRA by merging the scaling into lora_b."""
if self.scaling == 1: if self.scaling == 1:
return return self
self.lora_b *= self.scaling self.lora_b *= self.scaling
self.scaling = 1 self.scaling = 1
return self return self
......
...@@ -29,8 +29,8 @@ def _multi_split_sample( ...@@ -29,8 +29,8 @@ def _multi_split_sample(
sampled_tokens_size: Tuple[int, int], sampled_tokens_size: Tuple[int, int],
sampled_logprobs_size: Tuple[int, int], sampled_logprobs_size: Tuple[int, int],
sample_indices: torch.Tensor, sample_indices: torch.Tensor,
logprobs: torch.Tensor,
*, *,
logprobs: Optional[torch.Tensor] = None,
modify_greedy_probs: bool = False, modify_greedy_probs: bool = False,
save_logprobs: bool = False, save_logprobs: bool = False,
): ):
...@@ -167,6 +167,7 @@ def sample( ...@@ -167,6 +167,7 @@ def sample(
sampled_logprobs_size = (0, 0) sampled_logprobs_size = (0, 0)
logprobs = probs logprobs = probs
assert logprobs is not None
if _save_modified_probs: if _save_modified_probs:
sampled_modified_probs_size = sampled_tokens_size sampled_modified_probs_size = sampled_tokens_size
else: else:
......
...@@ -108,7 +108,8 @@ class RotaryEmbedding(nn.Module): ...@@ -108,7 +108,8 @@ class RotaryEmbedding(nn.Module):
query_pass = query[..., self.rotary_dim:] query_pass = query[..., self.rotary_dim:]
key_pass = key[..., self.rotary_dim:] key_pass = key[..., self.rotary_dim:]
self.cos_sin_cache = self.cos_sin_cache.to(positions.device) self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(
positions.device)
cos_sin = self.cos_sin_cache[torch.add(positions, offsets) cos_sin = self.cos_sin_cache[torch.add(positions, offsets)
if offsets is not None else positions] if offsets is not None else positions]
cos, sin = cos_sin.chunk(2, dim=-1) cos, sin = cos_sin.chunk(2, dim=-1)
......
...@@ -222,13 +222,15 @@ class JAISConfig(PretrainedConfig): ...@@ -222,13 +222,15 @@ class JAISConfig(PretrainedConfig):
f"got {alibi_scaling_type}") f"got {alibi_scaling_type}")
if (alibi_scaling_factor is not None if (alibi_scaling_factor is not None
and not isinstance(alibi_scaling_factor, float) and not isinstance(alibi_scaling_factor, float)
or alibi_scaling_factor <= 1.0): or (alibi_scaling_factor is not None
and alibi_scaling_factor <= 1.0)):
raise ValueError( raise ValueError(
f"`alibi_scaling`'s factor field must be a float > 1.0," f"`alibi_scaling`'s factor field must be a float > 1.0,"
f"got {alibi_scaling_factor}") f"got {alibi_scaling_factor}")
if (alibi_dynamic_scaling is not None if (alibi_dynamic_scaling is not None
and not isinstance(alibi_dynamic_scaling, int) and not isinstance(alibi_dynamic_scaling, int)
or alibi_dynamic_scaling <= 1): or (alibi_dynamic_scaling is not None
and alibi_dynamic_scaling <= 1)):
raise ValueError( raise ValueError(
f"`alibi_scaling`'s `train_seq_len` field must be an" f"`alibi_scaling`'s `train_seq_len` field must be an"
f"integer > 1, got {alibi_dynamic_scaling}") f"integer > 1, got {alibi_dynamic_scaling}")
...@@ -11,7 +11,7 @@ if ray: ...@@ -11,7 +11,7 @@ if ray:
from vllm.transformers_utils.tokenizer_group.ray_tokenizer_group import ( from vllm.transformers_utils.tokenizer_group.ray_tokenizer_group import (
RayTokenizerGroupPool) RayTokenizerGroupPool)
else: else:
RayTokenizerGroupPool = None RayTokenizerGroupPool = None # type: ignore
def get_tokenizer_group(tokenizer_pool_config: Optional[TokenizerPoolConfig], def get_tokenizer_group(tokenizer_pool_config: Optional[TokenizerPoolConfig],
......
...@@ -89,6 +89,7 @@ class RayTokenizerGroupPool(BaseTokenizerGroup): ...@@ -89,6 +89,7 @@ class RayTokenizerGroupPool(BaseTokenizerGroup):
This is blocking. This is blocking.
""" """
self._ensure_queue_initialized() self._ensure_queue_initialized()
assert self._idle_actors is not None
if self._idle_actors.empty(): if self._idle_actors.empty():
raise RuntimeError("No idle actors available.") raise RuntimeError("No idle actors available.")
...@@ -120,6 +121,7 @@ class RayTokenizerGroupPool(BaseTokenizerGroup): ...@@ -120,6 +121,7 @@ class RayTokenizerGroupPool(BaseTokenizerGroup):
This is non-blocking. This is non-blocking.
""" """
self._ensure_queue_initialized() self._ensure_queue_initialized()
assert self._idle_actors is not None
actor = await self._idle_actors.get() actor = await self._idle_actors.get()
try: try:
......
...@@ -114,9 +114,9 @@ class BaichuanTokenizer(PreTrainedTokenizer): ...@@ -114,9 +114,9 @@ class BaichuanTokenizer(PreTrainedTokenizer):
token = self.sp_model.IdToPiece(index) token = self.sp_model.IdToPiece(index)
return token return token
def convert_tokens_to_string(self, tokens): def convert_tokens_to_string(self, tokens: List[str]):
"""Converts a sequence of tokens (string) in a single string.""" """Converts a sequence of tokens (string) in a single string."""
current_sub_tokens = [] current_sub_tokens: List[str] = []
out_string = "" out_string = ""
prev_is_special = False prev_is_special = False
for i, token in enumerate(tokens): for i, token in enumerate(tokens):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment