Unverified Commit b9e96b17 authored by Simon Mo's avatar Simon Mo Committed by GitHub
Browse files

fix python 3.8 syntax (#2716)

parent 923797fe
...@@ -4,8 +4,21 @@ ...@@ -4,8 +4,21 @@
#################### BASE BUILD IMAGE #################### #################### BASE BUILD IMAGE ####################
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
# Set the DEBIAN_FRONTEND variable to noninteractive to avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
# Preconfigure tzdata for US Central Time (build running in us-central-1 but this really doesn't matter.)
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Chicago' | debconf-set-selections
# We install an older version of python here for testing to make sure vllm works with older versions of Python.
# For the actual openai compatible server, we will use the latest version of Python.
RUN apt-get update -y \ RUN apt-get update -y \
&& apt-get install -y python3-pip git && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa -y \
&& apt-get update -y \
&& apt-get install -y python3.8 python3.8-dev python3.8-venv python3-pip git \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1
# Workaround for https://github.com/openai/triton/issues/2507 and # Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
......
import asyncio import asyncio
import time import time
from fastapi import Request from fastapi import Request
from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional, Dict, Tuple
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import random_uuid from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
...@@ -19,8 +19,8 @@ from vllm.entrypoints.openai.serving_engine import OpenAIServing ...@@ -19,8 +19,8 @@ from vllm.entrypoints.openai.serving_engine import OpenAIServing
logger = init_logger(__name__) logger = init_logger(__name__)
TypeTokenIDs = list[int] TypeTokenIDs = List[int]
TypeTopLogProbs = List[Optional[dict[int, float]]] TypeTopLogProbs = List[Optional[Dict[int, float]]]
TypeCreateLogProbsFn = Callable[ TypeCreateLogProbsFn = Callable[
[TypeTokenIDs, TypeTopLogProbs, Optional[int], int], LogProbs] [TypeTokenIDs, TypeTopLogProbs, Optional[int], int], LogProbs]
...@@ -29,7 +29,7 @@ async def completion_stream_generator( ...@@ -29,7 +29,7 @@ async def completion_stream_generator(
request: CompletionRequest, request: CompletionRequest,
raw_request: Request, raw_request: Request,
on_abort, on_abort,
result_generator: AsyncIterator[tuple[int, RequestOutput]], result_generator: AsyncIterator[Tuple[int, RequestOutput]],
create_logprobs_fn: TypeCreateLogProbsFn, create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str, request_id: str,
created_time: int, created_time: int,
...@@ -126,7 +126,7 @@ async def completion_stream_generator( ...@@ -126,7 +126,7 @@ async def completion_stream_generator(
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
def parse_prompt_format(prompt) -> tuple[bool, list]: def parse_prompt_format(prompt) -> Tuple[bool, list]:
# get the prompt, openai supports the following # get the prompt, openai supports the following
# "a string, array of strings, array of tokens, or array of token arrays." # "a string, array of strings, array of tokens, or array of token arrays."
prompt_is_tokens = False prompt_is_tokens = False
...@@ -151,7 +151,7 @@ def parse_prompt_format(prompt) -> tuple[bool, list]: ...@@ -151,7 +151,7 @@ def parse_prompt_format(prompt) -> tuple[bool, list]:
def request_output_to_completion_response( def request_output_to_completion_response(
final_res_batch: list[RequestOutput], final_res_batch: List[RequestOutput],
request: CompletionRequest, request: CompletionRequest,
create_logprobs_fn: TypeCreateLogProbsFn, create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str, request_id: str,
...@@ -302,7 +302,7 @@ class OpenAIServingCompletion(OpenAIServing): ...@@ -302,7 +302,7 @@ class OpenAIServingCompletion(OpenAIServing):
except ValueError as e: except ValueError as e:
return self.create_error_response(str(e)) return self.create_error_response(str(e))
result_generator: AsyncIterator[tuple[ result_generator: AsyncIterator[Tuple[
int, RequestOutput]] = merge_async_iterators(*generators) int, RequestOutput]] = merge_async_iterators(*generators)
# Similar to the OpenAI API, when n != best_of, we do not stream the # Similar to the OpenAI API, when n != best_of, we do not stream the
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment