Unverified Commit 035a6cb0 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Misc] Update code for encoder-decoder models (#33900)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent a32cb49b
......@@ -1098,7 +1098,7 @@ class MultiModalEncDecInputs(MultiModalInputs):
Note: Even text-only encoder-decoder models are currently implemented
as multi-modal models for convenience.
(Example: https://github.com/neuralmagic/bart-plugin)
(Example: https://github.com/vllm-project/bart-plugin)
"""
encoder_prompt_token_ids: list[int]
......
......@@ -185,7 +185,13 @@ class Scheduler(SchedulerInterface):
# NOTE: Text-only encoder-decoder models are implemented as
# multi-modal models for convenience
# Example: https://github.com/neuralmagic/bart-plugin
# Example: https://github.com/vllm-project/bart-plugin
if self.is_encoder_decoder:
assert mm_budget and len(mm_budget.mm_max_toks_per_item) <= 1, (
"Encoder-decoder models are expected to implement the "
"multimodal interface with at most one modality."
)
self.max_num_encoder_input_tokens = (
mm_budget.encoder_compute_budget if mm_budget else 0
)
......@@ -200,7 +206,7 @@ class Scheduler(SchedulerInterface):
# TODO (NickLucche): Generalize to models with variable-length encoder inputs.
self._num_encoder_max_input_tokens = (
mm_budget.mm_max_toks_per_item[mm_budget.get_modality_with_max_tokens()]
if mm_budget
if mm_budget and mm_budget.mm_max_toks_per_item
else 0
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment