"tests/vscode:/vscode.git/clone" did not exist on "7693c8eabf88f72d5db14d376539782b34b09127"
streamlit_openai_chatbot_webserver.py 5.71 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""
vLLM Chat Assistant - A Streamlit Web Interface

A streamlined chat interface that quickly integrates
with vLLM API server.

Features:
- Multiple chat sessions management
- Streaming response display
- Configurable API endpoint
- Real-time chat history

Requirements:
    pip install streamlit openai

Usage:
    # Start the app with default settings
    streamlit run streamlit_openai_chatbot_webserver.py

    # Start with custom vLLM API endpoint
    VLLM_API_BASE="http://your-server:8000/v1" \
        streamlit run streamlit_openai_chatbot_webserver.py

    # Enable debug mode
    streamlit run streamlit_openai_chatbot_webserver.py \
        --logger.level=debug
"""
30

31
32
33
34
35
36
37
import os
from datetime import datetime

import streamlit as st
from openai import OpenAI

# Get command line arguments from environment variables
38
39
openai_api_key = os.getenv("VLLM_API_KEY", "EMPTY")
openai_api_base = os.getenv("VLLM_API_BASE", "http://localhost:8000/v1")
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

# Initialize session states for managing chat sessions
if "sessions" not in st.session_state:
    st.session_state.sessions = {}

if "current_session" not in st.session_state:
    st.session_state.current_session = None

if "messages" not in st.session_state:
    st.session_state.messages = []

if "active_session" not in st.session_state:
    st.session_state.active_session = None

# Initialize session state for API base URL
if "api_base_url" not in st.session_state:
    st.session_state.api_base_url = openai_api_base


def create_new_chat_session():
    """Create a new chat session with timestamp as ID"""
    session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    st.session_state.sessions[session_id] = []
    st.session_state.current_session = session_id
    st.session_state.active_session = session_id
    st.session_state.messages = []


def switch_to_chat_session(session_id):
    """Switch to a different chat session"""
    st.session_state.current_session = session_id
    st.session_state.active_session = session_id
    st.session_state.messages = st.session_state.sessions[session_id]


def get_llm_response(messages, model):
    """Get streaming response from llm

    Args:
        messages: List of message dictionaries
        model: Name of model

    Returns:
        Streaming response object or error message string
    """
    try:
86
87
88
        response = client.chat.completions.create(
            model=model, messages=messages, stream=True
        )
89
90
91
92
93
94
95
96
        return response
    except Exception as e:
        st.error(f"Error details: {str(e)}")
        return f"Error: {str(e)}"


# Sidebar - API Settings first
st.sidebar.title("API Settings")
97
98
99
new_api_base = st.sidebar.text_input(
    "API Base URL:", value=st.session_state.api_base_url
)
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
if new_api_base != st.session_state.api_base_url:
    st.session_state.api_base_url = new_api_base
    st.rerun()

st.sidebar.divider()

# Sidebar - Session Management
st.sidebar.title("Chat Sessions")
if st.sidebar.button("New Session"):
    create_new_chat_session()

# Display all sessions in reverse chronological order
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
    # Mark the active session with a pinned button
    if session_id == st.session_state.active_session:
115
116
117
118
119
120
121
        st.sidebar.button(
            f"📍 {session_id}",
            key=session_id,
            type="primary",
            on_click=switch_to_chat_session,
            args=(session_id,),
        )
122
    else:
123
124
125
126
127
128
        st.sidebar.button(
            f"Session {session_id}",
            key=session_id,
            on_click=switch_to_chat_session,
            args=(session_id,),
        )
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154

# Main interface
st.title("vLLM Chat Assistant")

# Initialize OpenAI client with API settings
client = OpenAI(api_key=openai_api_key, base_url=st.session_state.api_base_url)

# Get and display current model id
models = client.models.list()
model = models.data[0].id
st.markdown(f"**Model**: {model}")

# Initialize first session if none exists
if st.session_state.current_session is None:
    create_new_chat_session()
    st.session_state.active_session = st.session_state.current_session

# Display chat history for current session
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.write(message["content"])

# Handle user input and generate llm response
if prompt := st.chat_input("Type your message here..."):
    # Save user message to session
    st.session_state.messages.append({"role": "user", "content": prompt})
155
156
157
    st.session_state.sessions[st.session_state.current_session] = (
        st.session_state.messages
    )
158
159
160
161
162
163

    # Display user message
    with st.chat_message("user"):
        st.write(prompt)

    # Prepare messages for llm
164
165
166
    messages_for_llm = [
        {"role": m["role"], "content": m["content"]} for m in st.session_state.messages
    ]
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188

    # Generate and display llm response
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""

        # Get streaming response from llm
        response = get_llm_response(messages_for_llm, model)
        if isinstance(response, str):
            message_placeholder.markdown(response)
            full_response = response
        else:
            for chunk in response:
                if hasattr(chunk.choices[0].delta, "content"):
                    content = chunk.choices[0].delta.content
                    if content:
                        full_response += content
                        message_placeholder.markdown(full_response + "▌")

            message_placeholder.markdown(full_response)

    # Save llm response to session history
189
    st.session_state.messages.append({"role": "assistant", "content": full_response})