Unverified Commit 6cd4ae8a authored by Reid's avatar Reid Committed by GitHub
Browse files

[Frontend] Add tqdm_leave_pbar to control progress bar visibility (#19357)


Signed-off-by: default avatarreidliu41 <reid201711@gmail.com>
Co-authored-by: default avatarreidliu41 <reid201711@gmail.com>
parent c016047e
...@@ -281,7 +281,7 @@ class LLM: ...@@ -281,7 +281,7 @@ class LLM:
sampling_params: Optional[Union[SamplingParams, sampling_params: Optional[Union[SamplingParams,
Sequence[SamplingParams]]] = None, Sequence[SamplingParams]]] = None,
*, *,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions, guided_options_request: Optional[Union[LLMGuidedOptions,
...@@ -297,7 +297,7 @@ class LLM: ...@@ -297,7 +297,7 @@ class LLM:
sampling_params: Optional[Union[SamplingParams, sampling_params: Optional[Union[SamplingParams,
list[SamplingParams]]] = None, list[SamplingParams]]] = None,
prompt_token_ids: Optional[list[int]] = None, prompt_token_ids: Optional[list[int]] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions, guided_options_request: Optional[Union[LLMGuidedOptions,
...@@ -313,7 +313,7 @@ class LLM: ...@@ -313,7 +313,7 @@ class LLM:
sampling_params: Optional[Union[SamplingParams, sampling_params: Optional[Union[SamplingParams,
list[SamplingParams]]] = None, list[SamplingParams]]] = None,
prompt_token_ids: Optional[list[list[int]]] = None, prompt_token_ids: Optional[list[list[int]]] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions, guided_options_request: Optional[Union[LLMGuidedOptions,
...@@ -330,7 +330,7 @@ class LLM: ...@@ -330,7 +330,7 @@ class LLM:
list[SamplingParams]]] = None, list[SamplingParams]]] = None,
*, *,
prompt_token_ids: list[int], prompt_token_ids: list[int],
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions, guided_options_request: Optional[Union[LLMGuidedOptions,
...@@ -347,7 +347,7 @@ class LLM: ...@@ -347,7 +347,7 @@ class LLM:
list[SamplingParams]]] = None, list[SamplingParams]]] = None,
*, *,
prompt_token_ids: list[list[int]], prompt_token_ids: list[list[int]],
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions, guided_options_request: Optional[Union[LLMGuidedOptions,
...@@ -362,7 +362,7 @@ class LLM: ...@@ -362,7 +362,7 @@ class LLM:
prompts: None, prompts: None,
sampling_params: None, sampling_params: None,
prompt_token_ids: Union[list[int], list[list[int]]], prompt_token_ids: Union[list[int], list[list[int]]],
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions, guided_options_request: Optional[Union[LLMGuidedOptions,
...@@ -382,7 +382,7 @@ class LLM: ...@@ -382,7 +382,7 @@ class LLM:
sampling_params: Optional[Union[SamplingParams, sampling_params: Optional[Union[SamplingParams,
Sequence[SamplingParams]]] = None, Sequence[SamplingParams]]] = None,
prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None, prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
guided_options_request: Optional[Union[LLMGuidedOptions, guided_options_request: Optional[Union[LLMGuidedOptions,
...@@ -404,7 +404,10 @@ class LLM: ...@@ -404,7 +404,10 @@ class LLM:
When it is a single value, it is applied to every prompt. When it is a single value, it is applied to every prompt.
When it is a list, the list must have the same length as the When it is a list, the list must have the same length as the
prompts and it is paired one by one with the prompt. prompts and it is paired one by one with the prompt.
use_tqdm: Whether to use tqdm to display the progress bar. use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
If `False`, no progress bar is created.
lora_request: LoRA request to use for generation, if any. lora_request: LoRA request to use for generation, if any.
prompt_adapter_request: Prompt Adapter request to use for prompt_adapter_request: Prompt Adapter request to use for
generation, if any. generation, if any.
...@@ -678,7 +681,7 @@ class LLM: ...@@ -678,7 +681,7 @@ class LLM:
list[list[ChatCompletionMessageParam]]], list[list[ChatCompletionMessageParam]]],
sampling_params: Optional[Union[SamplingParams, sampling_params: Optional[Union[SamplingParams,
list[SamplingParams]]] = None, list[SamplingParams]]] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[LoRARequest] = None, lora_request: Optional[LoRARequest] = None,
chat_template: Optional[str] = None, chat_template: Optional[str] = None,
chat_template_content_format: ChatTemplateContentFormatOption = "auto", chat_template_content_format: ChatTemplateContentFormatOption = "auto",
...@@ -709,7 +712,10 @@ class LLM: ...@@ -709,7 +712,10 @@ class LLM:
is a single value, it is applied to every prompt. When it is a single value, it is applied to every prompt. When it
is a list, the list must have the same length as the is a list, the list must have the same length as the
prompts and it is paired one by one with the prompt. prompts and it is paired one by one with the prompt.
use_tqdm: Whether to use tqdm to display the progress bar. use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
If `False`, no progress bar is created.
lora_request: LoRA request to use for generation, if any. lora_request: LoRA request to use for generation, if any.
chat_template: The template to use for structuring the chat. chat_template: The template to use for structuring the chat.
If not provided, the model's default chat template will be used. If not provided, the model's default chat template will be used.
...@@ -823,7 +829,7 @@ class LLM: ...@@ -823,7 +829,7 @@ class LLM:
Sequence[PoolingParams]]] = None, Sequence[PoolingParams]]] = None,
*, *,
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[PoolingRequestOutput]: ) -> list[PoolingRequestOutput]:
...@@ -838,7 +844,7 @@ class LLM: ...@@ -838,7 +844,7 @@ class LLM:
Sequence[PoolingParams]]] = None, Sequence[PoolingParams]]] = None,
prompt_token_ids: Optional[list[int]] = None, prompt_token_ids: Optional[list[int]] = None,
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[PoolingRequestOutput]: ) -> list[PoolingRequestOutput]:
...@@ -853,7 +859,7 @@ class LLM: ...@@ -853,7 +859,7 @@ class LLM:
Sequence[PoolingParams]]] = None, Sequence[PoolingParams]]] = None,
prompt_token_ids: Optional[list[list[int]]] = None, prompt_token_ids: Optional[list[list[int]]] = None,
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[PoolingRequestOutput]: ) -> list[PoolingRequestOutput]:
...@@ -869,7 +875,7 @@ class LLM: ...@@ -869,7 +875,7 @@ class LLM:
*, *,
prompt_token_ids: list[int], prompt_token_ids: list[int],
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[PoolingRequestOutput]: ) -> list[PoolingRequestOutput]:
...@@ -885,7 +891,7 @@ class LLM: ...@@ -885,7 +891,7 @@ class LLM:
*, *,
prompt_token_ids: list[list[int]], prompt_token_ids: list[list[int]],
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[PoolingRequestOutput]: ) -> list[PoolingRequestOutput]:
...@@ -899,7 +905,7 @@ class LLM: ...@@ -899,7 +905,7 @@ class LLM:
pooling_params: None, pooling_params: None,
prompt_token_ids: Union[list[int], list[list[int]]], prompt_token_ids: Union[list[int], list[list[int]]],
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[PoolingRequestOutput]: ) -> list[PoolingRequestOutput]:
...@@ -918,7 +924,7 @@ class LLM: ...@@ -918,7 +924,7 @@ class LLM:
Sequence[PoolingParams]]] = None, Sequence[PoolingParams]]] = None,
prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None, prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None,
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[PoolingRequestOutput]: ) -> list[PoolingRequestOutput]:
...@@ -935,7 +941,10 @@ class LLM: ...@@ -935,7 +941,10 @@ class LLM:
for more details about the format of each prompts. for more details about the format of each prompts.
pooling_params: The pooling parameters for pooling. If None, we pooling_params: The pooling parameters for pooling. If None, we
use the default pooling parameters. use the default pooling parameters.
use_tqdm: Whether to use tqdm to display the progress bar. use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
If `False`, no progress bar is created.
lora_request: LoRA request to use for generation, if any. lora_request: LoRA request to use for generation, if any.
prompt_adapter_request: Prompt Adapter request to use for prompt_adapter_request: Prompt Adapter request to use for
generation, if any. generation, if any.
...@@ -1005,7 +1014,7 @@ class LLM: ...@@ -1005,7 +1014,7 @@ class LLM:
/, /,
*, *,
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
pooling_params: Optional[Union[PoolingParams, pooling_params: Optional[Union[PoolingParams,
Sequence[PoolingParams]]] = None, Sequence[PoolingParams]]] = None,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
...@@ -1024,7 +1033,10 @@ class LLM: ...@@ -1024,7 +1033,10 @@ class LLM:
for more details about the format of each prompts. for more details about the format of each prompts.
pooling_params: The pooling parameters for pooling. If None, we pooling_params: The pooling parameters for pooling. If None, we
use the default pooling parameters. use the default pooling parameters.
use_tqdm: Whether to use tqdm to display the progress bar. use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
If `False`, no progress bar is created.
lora_request: LoRA request to use for generation, if any. lora_request: LoRA request to use for generation, if any.
prompt_adapter_request: Prompt Adapter request to use for prompt_adapter_request: Prompt Adapter request to use for
generation, if any. generation, if any.
...@@ -1051,7 +1063,7 @@ class LLM: ...@@ -1051,7 +1063,7 @@ class LLM:
prompts: Union[PromptType, Sequence[PromptType]], prompts: Union[PromptType, Sequence[PromptType]],
/, /,
*, *,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[ClassificationRequestOutput]: ) -> list[ClassificationRequestOutput]:
...@@ -1066,7 +1078,10 @@ class LLM: ...@@ -1066,7 +1078,10 @@ class LLM:
prompts: The prompts to the LLM. You may pass a sequence of prompts prompts: The prompts to the LLM. You may pass a sequence of prompts
for batch inference. See [PromptType][vllm.inputs.PromptType] for batch inference. See [PromptType][vllm.inputs.PromptType]
for more details about the format of each prompts. for more details about the format of each prompts.
use_tqdm: Whether to use tqdm to display the progress bar. use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
If `False`, no progress bar is created.
lora_request: LoRA request to use for generation, if any. lora_request: LoRA request to use for generation, if any.
prompt_adapter_request: Prompt Adapter request to use for prompt_adapter_request: Prompt Adapter request to use for
generation, if any. generation, if any.
...@@ -1092,7 +1107,7 @@ class LLM: ...@@ -1092,7 +1107,7 @@ class LLM:
text_1: list[Union[str, TextPrompt, TokensPrompt]], text_1: list[Union[str, TextPrompt, TokensPrompt]],
text_2: list[Union[str, TextPrompt, TokensPrompt]], text_2: list[Union[str, TextPrompt, TokensPrompt]],
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[ScoringRequestOutput]: ) -> list[ScoringRequestOutput]:
...@@ -1126,7 +1141,7 @@ class LLM: ...@@ -1126,7 +1141,7 @@ class LLM:
text_1: list[str], text_1: list[str],
text_2: list[str], text_2: list[str],
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[ScoringRequestOutput]: ) -> list[ScoringRequestOutput]:
...@@ -1178,7 +1193,7 @@ class LLM: ...@@ -1178,7 +1193,7 @@ class LLM:
/, /,
*, *,
truncate_prompt_tokens: Optional[int] = None, truncate_prompt_tokens: Optional[int] = None,
use_tqdm: bool = True, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None, lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
) -> list[ScoringRequestOutput]: ) -> list[ScoringRequestOutput]:
...@@ -1198,7 +1213,10 @@ class LLM: ...@@ -1198,7 +1213,10 @@ class LLM:
text_2: The texts to pair with the query to form the input text_2: The texts to pair with the query to form the input
to the LLM. See [PromptType][vllm.inputs.PromptType] for to the LLM. See [PromptType][vllm.inputs.PromptType] for
more details about the format of each prompts. more details about the format of each prompts.
use_tqdm: Whether to use tqdm to display the progress bar. use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
If `False`, no progress bar is created.
lora_request: LoRA request to use for generation, if any. lora_request: LoRA request to use for generation, if any.
prompt_adapter_request: Prompt Adapter request to use for prompt_adapter_request: Prompt Adapter request to use for
generation, if any. generation, if any.
...@@ -1379,7 +1397,7 @@ class LLM: ...@@ -1379,7 +1397,7 @@ class LLM:
params: Union[SamplingParams, Sequence[SamplingParams], PoolingParams, params: Union[SamplingParams, Sequence[SamplingParams], PoolingParams,
Sequence[PoolingParams]], Sequence[PoolingParams]],
*, *,
use_tqdm: bool, use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[Sequence[LoRARequest], LoRARequest]], lora_request: Optional[Union[Sequence[LoRARequest], LoRARequest]],
prompt_adapter_request: Optional[PromptAdapterRequest], prompt_adapter_request: Optional[PromptAdapterRequest],
tokenization_kwargs: Optional[dict[str, Any]] = None, tokenization_kwargs: Optional[dict[str, Any]] = None,
...@@ -1417,7 +1435,8 @@ class LLM: ...@@ -1417,7 +1435,8 @@ class LLM:
# Add requests to the engine. # Add requests to the engine.
it = prompts it = prompts
if use_tqdm: if use_tqdm:
it = tqdm(it, desc="Adding requests") tqdm_func = use_tqdm if callable(use_tqdm) else tqdm
it = tqdm_func(it, desc="Adding requests")
for i, prompt in enumerate(it): for i, prompt in enumerate(it):
self._add_request( self._add_request(
...@@ -1474,12 +1493,15 @@ class LLM: ...@@ -1474,12 +1493,15 @@ class LLM:
return params return params
def _run_engine( def _run_engine(
self, *, use_tqdm: bool self,
*,
use_tqdm: Union[bool, Callable[..., tqdm]] = True
) -> list[Union[RequestOutput, PoolingRequestOutput]]: ) -> list[Union[RequestOutput, PoolingRequestOutput]]:
# Initialize tqdm. # Initialize tqdm.
if use_tqdm: if use_tqdm:
num_requests = self.llm_engine.get_num_unfinished_requests() num_requests = self.llm_engine.get_num_unfinished_requests()
pbar = tqdm( tqdm_func = use_tqdm if callable(use_tqdm) else tqdm
pbar = tqdm_func(
total=num_requests, total=num_requests,
desc="Processed prompts", desc="Processed prompts",
dynamic_ncols=True, dynamic_ncols=True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment