"tests/kernels/untest_mamba_ssm.py" did not exist on "622b7ab955186f37879208d7a30e9faf985be220"
streamlit_openai_chatbot_webserver.py 10.3 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5
6
7
8
9
10
11
12
13
"""
vLLM Chat Assistant - A Streamlit Web Interface

A streamlined chat interface that quickly integrates
with vLLM API server.

Features:
- Multiple chat sessions management
- Streaming response display
- Configurable API endpoint
- Real-time chat history
14
- Reasoning Display: Optional thinking process visualization 
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

Requirements:
    pip install streamlit openai

Usage:
    # Start the app with default settings
    streamlit run streamlit_openai_chatbot_webserver.py

    # Start with custom vLLM API endpoint
    VLLM_API_BASE="http://your-server:8000/v1" \
        streamlit run streamlit_openai_chatbot_webserver.py

    # Enable debug mode
    streamlit run streamlit_openai_chatbot_webserver.py \
        --logger.level=debug
"""
31

32
33
34
35
36
37
38
import os
from datetime import datetime

import streamlit as st
from openai import OpenAI

# Get command line arguments from environment variables
39
40
openai_api_key = os.getenv("VLLM_API_KEY", "EMPTY")
openai_api_base = os.getenv("VLLM_API_BASE", "http://localhost:8000/v1")
41
42
43
44
45
46
47
48
49
50
51
52
53
54

# Initialize session states for managing chat sessions
if "sessions" not in st.session_state:
    st.session_state.sessions = {}

if "current_session" not in st.session_state:
    st.session_state.current_session = None

if "messages" not in st.session_state:
    st.session_state.messages = []

if "active_session" not in st.session_state:
    st.session_state.active_session = None

55
56
57
58
# Add new session state for reasoning
if "show_reasoning" not in st.session_state:
    st.session_state.show_reasoning = {}

59
60
61
62
63
64
# Initialize session state for API base URL
if "api_base_url" not in st.session_state:
    st.session_state.api_base_url = openai_api_base


def create_new_chat_session():
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    """Create a new chat session with timestamp as unique identifier.

    This function initializes a new chat session by:
    1. Generating a timestamp-based session ID
    2. Creating an empty message list for the new session
    3. Setting the new session as both current and active session
    4. Resetting the messages list for the new session

    Returns:
        None

    Session State Updates:
        - sessions: Adds new empty message list with timestamp key
        - current_session: Sets to new session ID
        - active_session: Sets to new session ID
        - messages: Resets to empty list
    """
82
83
84
85
86
87
88
89
    session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    st.session_state.sessions[session_id] = []
    st.session_state.current_session = session_id
    st.session_state.active_session = session_id
    st.session_state.messages = []


def switch_to_chat_session(session_id):
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    """Switch the active chat context to a different session.

    Args:
        session_id (str): The timestamp ID of the session to switch to

    This function handles chat session switching by:
    1. Setting the specified session as current
    2. Updating the active session marker
    3. Loading the messages history from the specified session

    Session State Updates:
        - current_session: Updated to specified session_id
        - active_session: Updated to specified session_id
        - messages: Loaded from sessions[session_id]
    """
105
106
107
108
109
    st.session_state.current_session = session_id
    st.session_state.active_session = session_id
    st.session_state.messages = st.session_state.sessions[session_id]


110
111
def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None):
    """Generate and stream LLM response with optional reasoning process.
112
113

    Args:
114
115
116
117
118
        messages (list): List of conversation message dicts with 'role' and 'content'
        model (str): The model identifier to use for generation
        reason (bool): Whether to enable and display reasoning process
        content_ph (streamlit.empty): Placeholder for streaming response content
        reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process
119
120

    Returns:
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
        tuple: (str, str)
            - First string contains the complete response text
            - Second string contains the complete reasoning text (if enabled)

    Features:
        - Streams both reasoning and response text in real-time
        - Handles model API errors gracefully
        - Supports live updating of thinking process
        - Maintains separate content and reasoning displays

    Raises:
        Exception: Wrapped in error message if API call fails

    Note:
        The function uses streamlit placeholders for live updates.
        When reason=True, the reasoning process appears above the response.
137
    """
138
139
140
141
142
143
144
145
    full_text = ""
    think_text = ""
    live_think = None
    # Build request parameters
    params = {"model": model, "messages": messages, "stream": True}
    if reason:
        params["extra_body"] = {"chat_template_kwargs": {"enable_thinking": True}}

146
    try:
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
        response = client.chat.completions.create(**params)
        if isinstance(response, str):
            if content_ph:
                content_ph.markdown(response)
            return response, ""

        # Prepare reasoning expander above content
        if reason and reasoning_ph:
            exp = reasoning_ph.expander("💭 Thinking Process (live)", expanded=True)
            live_think = exp.empty()

        # Stream chunks
        for chunk in response:
            delta = chunk.choices[0].delta
            # Stream reasoning first
            if reason and hasattr(delta, "reasoning_content") and live_think:
                rc = delta.reasoning_content
                if rc:
                    think_text += rc
                    live_think.markdown(think_text + "▌")
            # Then stream content
            if hasattr(delta, "content") and delta.content and content_ph:
                full_text += delta.content
                content_ph.markdown(full_text + "▌")

        # Finalize displays: reasoning remains above, content below
        if reason and live_think:
            live_think.markdown(think_text)
        if content_ph:
            content_ph.markdown(full_text)

        return full_text, think_text
179
180
    except Exception as e:
        st.error(f"Error details: {str(e)}")
181
        return f"Error: {str(e)}", ""
182
183
184
185


# Sidebar - API Settings first
st.sidebar.title("API Settings")
186
187
188
new_api_base = st.sidebar.text_input(
    "API Base URL:", value=st.session_state.api_base_url
)
189
190
191
192
193
194
195
196
197
198
199
if new_api_base != st.session_state.api_base_url:
    st.session_state.api_base_url = new_api_base
    st.rerun()

st.sidebar.divider()

# Sidebar - Session Management
st.sidebar.title("Chat Sessions")
if st.sidebar.button("New Session"):
    create_new_chat_session()

200

201
202
203
204
# Display all sessions in reverse chronological order
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
    # Mark the active session with a pinned button
    if session_id == st.session_state.active_session:
205
206
207
208
209
210
211
        st.sidebar.button(
            f"📍 {session_id}",
            key=session_id,
            type="primary",
            on_click=switch_to_chat_session,
            args=(session_id,),
        )
212
    else:
213
214
215
216
217
218
        st.sidebar.button(
            f"Session {session_id}",
            key=session_id,
            on_click=switch_to_chat_session,
            args=(session_id,),
        )
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

# Main interface
st.title("vLLM Chat Assistant")

# Initialize OpenAI client with API settings
client = OpenAI(api_key=openai_api_key, base_url=st.session_state.api_base_url)

# Get and display current model id
models = client.models.list()
model = models.data[0].id
st.markdown(f"**Model**: {model}")

# Initialize first session if none exists
if st.session_state.current_session is None:
    create_new_chat_session()
    st.session_state.active_session = st.session_state.current_session

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# Update the chat history display section
for idx, msg in enumerate(st.session_state.messages):
    # Render user messages normally
    if msg["role"] == "user":
        with st.chat_message("user"):
            st.write(msg["content"])
    # Render assistant messages with reasoning above
    else:
        # If reasoning exists for this assistant message, show it above the content
        if idx in st.session_state.show_reasoning:
            with st.expander("💭 Thinking Process", expanded=False):
                st.markdown(st.session_state.show_reasoning[idx])
        with st.chat_message("assistant"):
            st.write(msg["content"])


# Setup & Cache reasoning support check
@st.cache_data(show_spinner=False)
def server_supports_reasoning():
    """Check if the current model supports reasoning capability.

    Returns:
        bool: True if the model supports reasoning, False otherwise
    """
    resp = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": "Hi"}],
        stream=False,
    )
    return hasattr(resp.choices[0].message, "reasoning_content") and bool(
        resp.choices[0].message.reasoning_content
    )
268

269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284

# Check support
supports_reasoning = server_supports_reasoning()

# Add reasoning toggle in sidebar if supported
reason = False  # Default to False
if supports_reasoning:
    reason = st.sidebar.checkbox("Enable Reasoning", value=False)
else:
    st.sidebar.markdown(
        "<span style='color:gray;'>Reasoning unavailable for this model.</span>",
        unsafe_allow_html=True,
    )
    # reason remains False

# Update the input handling section
285
if prompt := st.chat_input("Type your message here..."):
286
    # Save and display user message
287
    st.session_state.messages.append({"role": "user", "content": prompt})
288
289
290
    st.session_state.sessions[st.session_state.current_session] = (
        st.session_state.messages
    )
291
292
293
    with st.chat_message("user"):
        st.write(prompt)

294
295
    # Prepare LLM messages
    msgs = [
296
297
        {"role": m["role"], "content": m["content"]} for m in st.session_state.messages
    ]
298

299
    # Stream assistant response
300
    with st.chat_message("assistant"):
301
302
303
304
305
306
307
308
309
310
311
        # Placeholders: reasoning above, content below
        reason_ph = st.empty()
        content_ph = st.empty()
        full, think = get_llm_response(msgs, model, reason, content_ph, reason_ph)
        # Determine index for this new assistant message
        message_index = len(st.session_state.messages)
        # Save assistant reply
        st.session_state.messages.append({"role": "assistant", "content": full})
        # Persist reasoning in session state if any
        if reason and think:
            st.session_state.show_reasoning[message_index] = think