Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b25cfab9
Unverified
Commit
b25cfab9
authored
Jan 11, 2025
by
Roger Wang
Committed by
GitHub
Jan 12, 2025
Browse files
[V1] Avoid sending text prompt to core engine (#11963)
Signed-off-by:
Roger Wang
<
ywang@roblox.com
>
parent
4b657d32
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
2 deletions
+8
-2
vllm/v1/engine/__init__.py
vllm/v1/engine/__init__.py
+2
-2
vllm/v1/engine/core_client.py
vllm/v1/engine/core_client.py
+6
-0
No files found.
vllm/v1/engine/__init__.py
View file @
b25cfab9
...
@@ -19,8 +19,8 @@ class EngineCoreRequest:
...
@@ -19,8 +19,8 @@ class EngineCoreRequest:
# due to circular imports and typing we have in data.py
# due to circular imports and typing we have in data.py
request_id
:
str
request_id
:
str
#NOTE(
Nick): I don't think we need to pass prompt here since it should
#
NOTE(
ywang96): original text prompt is needed when a request is added to
#
always be tokenized?
#
Detokenizer, but set to None when it is added to EngineCoreClient.
prompt
:
Optional
[
str
]
prompt
:
Optional
[
str
]
prompt_token_ids
:
List
[
int
]
prompt_token_ids
:
List
[
int
]
mm_inputs
:
Optional
[
List
[
Optional
[
"MultiModalKwargs"
]]]
mm_inputs
:
Optional
[
List
[
Optional
[
"MultiModalKwargs"
]]]
...
...
vllm/v1/engine/core_client.py
View file @
b25cfab9
...
@@ -219,6 +219,9 @@ class SyncMPClient(MPClient):
...
@@ -219,6 +219,9 @@ class SyncMPClient(MPClient):
self
.
input_socket
.
send_multipart
(
msg
,
copy
=
False
)
self
.
input_socket
.
send_multipart
(
msg
,
copy
=
False
)
def
add_request
(
self
,
request
:
EngineCoreRequest
)
->
None
:
def
add_request
(
self
,
request
:
EngineCoreRequest
)
->
None
:
# NOTE: text prompt is not needed in the core engine as it has been
# tokenized.
request
.
prompt
=
None
self
.
_send_input
(
EngineCoreRequestType
.
ADD
,
request
)
self
.
_send_input
(
EngineCoreRequestType
.
ADD
,
request
)
def
abort_requests
(
self
,
request_ids
:
List
[
str
])
->
None
:
def
abort_requests
(
self
,
request_ids
:
List
[
str
])
->
None
:
...
@@ -257,6 +260,9 @@ class AsyncMPClient(MPClient):
...
@@ -257,6 +260,9 @@ class AsyncMPClient(MPClient):
await
self
.
input_socket
.
send_multipart
(
msg
,
copy
=
False
)
await
self
.
input_socket
.
send_multipart
(
msg
,
copy
=
False
)
async
def
add_request_async
(
self
,
request
:
EngineCoreRequest
)
->
None
:
async
def
add_request_async
(
self
,
request
:
EngineCoreRequest
)
->
None
:
# NOTE: text prompt is not needed in the core engine as it has been
# tokenized.
request
.
prompt
=
None
await
self
.
_send_input
(
EngineCoreRequestType
.
ADD
,
request
)
await
self
.
_send_input
(
EngineCoreRequestType
.
ADD
,
request
)
async
def
abort_requests_async
(
self
,
request_ids
:
List
[
str
])
->
None
:
async
def
abort_requests_async
(
self
,
request_ids
:
List
[
str
])
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment