Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
b9e96b17
Unverified
Commit
b9e96b17
authored
Feb 01, 2024
by
Simon Mo
Committed by
GitHub
Feb 01, 2024
Browse files
fix python 3.8 syntax (#2716)
parent
923797fe
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
8 deletions
+21
-8
Dockerfile
Dockerfile
+14
-1
vllm/entrypoints/openai/serving_completion.py
vllm/entrypoints/openai/serving_completion.py
+7
-7
No files found.
Dockerfile
View file @
b9e96b17
...
@@ -4,8 +4,21 @@
...
@@ -4,8 +4,21 @@
#################### BASE BUILD IMAGE ####################
#################### BASE BUILD IMAGE ####################
FROM
nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
FROM
nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
# Set the DEBIAN_FRONTEND variable to noninteractive to avoid interactive prompts
ENV
DEBIAN_FRONTEND=noninteractive
# Preconfigure tzdata for US Central Time (build running in us-central-1 but this really doesn't matter.)
RUN
echo
'tzdata tzdata/Areas select America'
| debconf-set-selections
\
&&
echo
'tzdata tzdata/Zones/America select Chicago'
| debconf-set-selections
# We install an older version of python here for testing to make sure vllm works with older versions of Python.
# For the actual openai compatible server, we will use the latest version of Python.
RUN
apt-get update
-y
\
RUN
apt-get update
-y
\
&&
apt-get
install
-y
python3-pip git
&&
apt-get
install
-y
software-properties-common
\
&&
add-apt-repository ppa:deadsnakes/ppa
-y
\
&&
apt-get update
-y
\
&&
apt-get
install
-y
python3.8 python3.8-dev python3.8-venv python3-pip git
\
&&
update-alternatives
--install
/usr/bin/python3 python3 /usr/bin/python3.8 1
# Workaround for https://github.com/openai/triton/issues/2507 and
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
...
...
vllm/entrypoints/openai/serving_completion.py
View file @
b9e96b17
import
asyncio
import
asyncio
import
time
import
time
from
fastapi
import
Request
from
fastapi
import
Request
from
typing
import
AsyncGenerator
,
AsyncIterator
,
Callable
,
List
,
Optional
from
typing
import
AsyncGenerator
,
AsyncIterator
,
Callable
,
List
,
Optional
,
Dict
,
Tuple
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils
import
random_uuid
from
vllm.utils
import
random_uuid
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
from
vllm.engine.async_llm_engine
import
AsyncLLMEngine
...
@@ -19,8 +19,8 @@ from vllm.entrypoints.openai.serving_engine import OpenAIServing
...
@@ -19,8 +19,8 @@ from vllm.entrypoints.openai.serving_engine import OpenAIServing
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
TypeTokenIDs
=
l
ist
[
int
]
TypeTokenIDs
=
L
ist
[
int
]
TypeTopLogProbs
=
List
[
Optional
[
d
ict
[
int
,
float
]]]
TypeTopLogProbs
=
List
[
Optional
[
D
ict
[
int
,
float
]]]
TypeCreateLogProbsFn
=
Callable
[
TypeCreateLogProbsFn
=
Callable
[
[
TypeTokenIDs
,
TypeTopLogProbs
,
Optional
[
int
],
int
],
LogProbs
]
[
TypeTokenIDs
,
TypeTopLogProbs
,
Optional
[
int
],
int
],
LogProbs
]
...
@@ -29,7 +29,7 @@ async def completion_stream_generator(
...
@@ -29,7 +29,7 @@ async def completion_stream_generator(
request
:
CompletionRequest
,
request
:
CompletionRequest
,
raw_request
:
Request
,
raw_request
:
Request
,
on_abort
,
on_abort
,
result_generator
:
AsyncIterator
[
t
uple
[
int
,
RequestOutput
]],
result_generator
:
AsyncIterator
[
T
uple
[
int
,
RequestOutput
]],
create_logprobs_fn
:
TypeCreateLogProbsFn
,
create_logprobs_fn
:
TypeCreateLogProbsFn
,
request_id
:
str
,
request_id
:
str
,
created_time
:
int
,
created_time
:
int
,
...
@@ -126,7 +126,7 @@ async def completion_stream_generator(
...
@@ -126,7 +126,7 @@ async def completion_stream_generator(
yield
"data: [DONE]
\n\n
"
yield
"data: [DONE]
\n\n
"
def
parse_prompt_format
(
prompt
)
->
t
uple
[
bool
,
list
]:
def
parse_prompt_format
(
prompt
)
->
T
uple
[
bool
,
list
]:
# get the prompt, openai supports the following
# get the prompt, openai supports the following
# "a string, array of strings, array of tokens, or array of token arrays."
# "a string, array of strings, array of tokens, or array of token arrays."
prompt_is_tokens
=
False
prompt_is_tokens
=
False
...
@@ -151,7 +151,7 @@ def parse_prompt_format(prompt) -> tuple[bool, list]:
...
@@ -151,7 +151,7 @@ def parse_prompt_format(prompt) -> tuple[bool, list]:
def
request_output_to_completion_response
(
def
request_output_to_completion_response
(
final_res_batch
:
l
ist
[
RequestOutput
],
final_res_batch
:
L
ist
[
RequestOutput
],
request
:
CompletionRequest
,
request
:
CompletionRequest
,
create_logprobs_fn
:
TypeCreateLogProbsFn
,
create_logprobs_fn
:
TypeCreateLogProbsFn
,
request_id
:
str
,
request_id
:
str
,
...
@@ -302,7 +302,7 @@ class OpenAIServingCompletion(OpenAIServing):
...
@@ -302,7 +302,7 @@ class OpenAIServingCompletion(OpenAIServing):
except
ValueError
as
e
:
except
ValueError
as
e
:
return
self
.
create_error_response
(
str
(
e
))
return
self
.
create_error_response
(
str
(
e
))
result_generator
:
AsyncIterator
[
t
uple
[
result_generator
:
AsyncIterator
[
T
uple
[
int
,
RequestOutput
]]
=
merge_async_iterators
(
*
generators
)
int
,
RequestOutput
]]
=
merge_async_iterators
(
*
generators
)
# Similar to the OpenAI API, when n != best_of, we do not stream the
# Similar to the OpenAI API, when n != best_of, we do not stream the
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment