infer.py 2.51 KB
Newer Older
chenych's avatar
chenych committed
1
# Copyright 2025 the LlamaFactory team.
chenych's avatar
chenych committed
2
3
4
5
6
7
8
9
10
11
12
13
14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

chenych's avatar
chenych committed
15
from typing import TYPE_CHECKING
chenych's avatar
chenych committed
16
17

from ...extras.packages import is_gradio_available
chenych's avatar
chenych committed
18
from ..common import is_multimodal
chenych's avatar
chenych committed
19
20
21
22
23
24
25
26
27
28
29
30
31
from .chatbot import create_chat_box


if is_gradio_available():
    import gradio as gr


if TYPE_CHECKING:
    from gradio.components import Component

    from ..engine import Engine


chenych's avatar
chenych committed
32
def create_infer_tab(engine: "Engine") -> dict[str, "Component"]:
chenych's avatar
chenych committed
33
34
35
36
    input_elems = engine.manager.get_base_elems()
    elem_dict = dict()

    with gr.Row():
chenych's avatar
chenych committed
37
        infer_backend = gr.Dropdown(choices=["huggingface", "vllm", "sglang"], value="huggingface")
chenych's avatar
chenych committed
38
        infer_dtype = gr.Dropdown(choices=["auto", "float16", "bfloat16", "float32"], value="auto")
chenych's avatar
chenych committed
39
        extra_args = gr.Textbox(value='{"vllm_enforce_eager": true}')
chenych's avatar
chenych committed
40
41
42
43
44
45
46

    with gr.Row():
        load_btn = gr.Button()
        unload_btn = gr.Button()

    info_box = gr.Textbox(show_label=False, interactive=False)

chenych's avatar
chenych committed
47
    input_elems.update({infer_backend, infer_dtype, extra_args})
chenych's avatar
chenych committed
48
49
50
51
    elem_dict.update(
        dict(
            infer_backend=infer_backend,
            infer_dtype=infer_dtype,
chenych's avatar
chenych committed
52
            extra_args=extra_args,
chenych's avatar
chenych committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
            load_btn=load_btn,
            unload_btn=unload_btn,
            info_box=info_box,
        )
    )

    chatbot, messages, chat_elems = create_chat_box(engine, visible=False)
    elem_dict.update(chat_elems)

    load_btn.click(engine.chatter.load_model, input_elems, [info_box]).then(
        lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]]
    )

    unload_btn.click(engine.chatter.unload_model, input_elems, [info_box]).then(
        lambda: ([], []), outputs=[chatbot, messages]
    ).then(lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]])

luopl's avatar
luopl committed
70
    engine.manager.get_elem_by_id("top.model_name").change(
chenych's avatar
chenych committed
71
        lambda model_name: gr.Column(visible=is_multimodal(model_name)),
luopl's avatar
luopl committed
72
73
        [engine.manager.get_elem_by_id("top.model_name")],
        [chat_elems["mm_box"]],
chenych's avatar
chenych committed
74
75
76
    )

    return elem_dict