Unverified Commit b9e96b17 authored by Simon Mo's avatar Simon Mo Committed by GitHub
Browse files

fix python 3.8 syntax (#2716)

parent 923797fe
......@@ -4,8 +4,21 @@
#################### BASE BUILD IMAGE ####################
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
# Set the DEBIAN_FRONTEND variable to noninteractive to avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
# Preconfigure tzdata for US Central Time (build running in us-central-1 but this really doesn't matter.)
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Chicago' | debconf-set-selections
# We install an older version of python here for testing to make sure vllm works with older versions of Python.
# For the actual openai compatible server, we will use the latest version of Python.
RUN apt-get update -y \
&& apt-get install -y python3-pip git
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa -y \
&& apt-get update -y \
&& apt-get install -y python3.8 python3.8-dev python3.8-venv python3-pip git \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
......
import asyncio
import time
from fastapi import Request
from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional
from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional, Dict, Tuple
from vllm.logger import init_logger
from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine
......@@ -19,8 +19,8 @@ from vllm.entrypoints.openai.serving_engine import OpenAIServing
logger = init_logger(__name__)
TypeTokenIDs = list[int]
TypeTopLogProbs = List[Optional[dict[int, float]]]
TypeTokenIDs = List[int]
TypeTopLogProbs = List[Optional[Dict[int, float]]]
TypeCreateLogProbsFn = Callable[
[TypeTokenIDs, TypeTopLogProbs, Optional[int], int], LogProbs]
......@@ -29,7 +29,7 @@ async def completion_stream_generator(
request: CompletionRequest,
raw_request: Request,
on_abort,
result_generator: AsyncIterator[tuple[int, RequestOutput]],
result_generator: AsyncIterator[Tuple[int, RequestOutput]],
create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str,
created_time: int,
......@@ -126,7 +126,7 @@ async def completion_stream_generator(
yield "data: [DONE]\n\n"
def parse_prompt_format(prompt) -> tuple[bool, list]:
def parse_prompt_format(prompt) -> Tuple[bool, list]:
# get the prompt, openai supports the following
# "a string, array of strings, array of tokens, or array of token arrays."
prompt_is_tokens = False
......@@ -151,7 +151,7 @@ def parse_prompt_format(prompt) -> tuple[bool, list]:
def request_output_to_completion_response(
final_res_batch: list[RequestOutput],
final_res_batch: List[RequestOutput],
request: CompletionRequest,
create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str,
......@@ -302,7 +302,7 @@ class OpenAIServingCompletion(OpenAIServing):
except ValueError as e:
return self.create_error_response(str(e))
result_generator: AsyncIterator[tuple[
result_generator: AsyncIterator[Tuple[
int, RequestOutput]] = merge_async_iterators(*generators)
# Similar to the OpenAI API, when n != best_of, we do not stream the
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment