Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3e750697
Unverified
Commit
3e750697
authored
Jun 16, 2025
by
Navanit Dubey
Committed by
GitHub
Jun 16, 2025
Browse files
[DOC] Add reasoning capability to vLLM streamlit code (#19557)
parent
ee35e96a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
165 additions
and
43 deletions
+165
-43
examples/online_serving/streamlit_openai_chatbot_webserver.py
...ples/online_serving/streamlit_openai_chatbot_webserver.py
+165
-43
No files found.
examples/online_serving/streamlit_openai_chatbot_webserver.py
View file @
3e750697
...
...
@@ -11,6 +11,7 @@ Features:
- Streaming response display
- Configurable API endpoint
- Real-time chat history
- Reasoning Display: Optional thinking process visualization
Requirements:
pip install streamlit openai
...
...
@@ -51,13 +52,33 @@ if "messages" not in st.session_state:
if
"active_session"
not
in
st
.
session_state
:
st
.
session_state
.
active_session
=
None
# Add new session state for reasoning
if
"show_reasoning"
not
in
st
.
session_state
:
st
.
session_state
.
show_reasoning
=
{}
# Initialize session state for API base URL
if
"api_base_url"
not
in
st
.
session_state
:
st
.
session_state
.
api_base_url
=
openai_api_base
def
create_new_chat_session
():
"""Create a new chat session with timestamp as ID"""
"""Create a new chat session with timestamp as unique identifier.
This function initializes a new chat session by:
1. Generating a timestamp-based session ID
2. Creating an empty message list for the new session
3. Setting the new session as both current and active session
4. Resetting the messages list for the new session
Returns:
None
Session State Updates:
- sessions: Adds new empty message list with timestamp key
- current_session: Sets to new session ID
- active_session: Sets to new session ID
- messages: Resets to empty list
"""
session_id
=
datetime
.
now
().
strftime
(
"%Y-%m-%d %H:%M:%S"
)
st
.
session_state
.
sessions
[
session_id
]
=
[]
st
.
session_state
.
current_session
=
session_id
...
...
@@ -66,30 +87,98 @@ def create_new_chat_session():
def
switch_to_chat_session
(
session_id
):
"""Switch to a different chat session"""
"""Switch the active chat context to a different session.
Args:
session_id (str): The timestamp ID of the session to switch to
This function handles chat session switching by:
1. Setting the specified session as current
2. Updating the active session marker
3. Loading the messages history from the specified session
Session State Updates:
- current_session: Updated to specified session_id
- active_session: Updated to specified session_id
- messages: Loaded from sessions[session_id]
"""
st
.
session_state
.
current_session
=
session_id
st
.
session_state
.
active_session
=
session_id
st
.
session_state
.
messages
=
st
.
session_state
.
sessions
[
session_id
]
def
get_llm_response
(
messages
,
model
):
"""Ge
t
stream
ing
response
from llm
def
get_llm_response
(
messages
,
model
,
reason
,
content_ph
=
None
,
reasoning_ph
=
None
):
"""Ge
nerate and
stream
LLM
response
with optional reasoning process.
Args:
messages: List of message dictionaries
model: Name of model
messages (list): List of conversation message dicts with 'role' and 'content'
model (str): The model identifier to use for generation
reason (bool): Whether to enable and display reasoning process
content_ph (streamlit.empty): Placeholder for streaming response content
reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process
Returns:
Streaming response object or error message string
tuple: (str, str)
- First string contains the complete response text
- Second string contains the complete reasoning text (if enabled)
Features:
- Streams both reasoning and response text in real-time
- Handles model API errors gracefully
- Supports live updating of thinking process
- Maintains separate content and reasoning displays
Raises:
Exception: Wrapped in error message if API call fails
Note:
The function uses streamlit placeholders for live updates.
When reason=True, the reasoning process appears above the response.
"""
full_text
=
""
think_text
=
""
live_think
=
None
# Build request parameters
params
=
{
"model"
:
model
,
"messages"
:
messages
,
"stream"
:
True
}
if
reason
:
params
[
"extra_body"
]
=
{
"chat_template_kwargs"
:
{
"enable_thinking"
:
True
}}
try
:
response
=
client
.
chat
.
completions
.
create
(
model
=
model
,
messages
=
messages
,
stream
=
True
)
return
response
response
=
client
.
chat
.
completions
.
create
(
**
params
)
if
isinstance
(
response
,
str
):
if
content_ph
:
content_ph
.
markdown
(
response
)
return
response
,
""
# Prepare reasoning expander above content
if
reason
and
reasoning_ph
:
exp
=
reasoning_ph
.
expander
(
"💭 Thinking Process (live)"
,
expanded
=
True
)
live_think
=
exp
.
empty
()
# Stream chunks
for
chunk
in
response
:
delta
=
chunk
.
choices
[
0
].
delta
# Stream reasoning first
if
reason
and
hasattr
(
delta
,
"reasoning_content"
)
and
live_think
:
rc
=
delta
.
reasoning_content
if
rc
:
think_text
+=
rc
live_think
.
markdown
(
think_text
+
"▌"
)
# Then stream content
if
hasattr
(
delta
,
"content"
)
and
delta
.
content
and
content_ph
:
full_text
+=
delta
.
content
content_ph
.
markdown
(
full_text
+
"▌"
)
# Finalize displays: reasoning remains above, content below
if
reason
and
live_think
:
live_think
.
markdown
(
think_text
)
if
content_ph
:
content_ph
.
markdown
(
full_text
)
return
full_text
,
think_text
except
Exception
as
e
:
st
.
error
(
f
"Error details:
{
str
(
e
)
}
"
)
return
f
"Error:
{
str
(
e
)
}
"
return
f
"Error:
{
str
(
e
)
}
"
,
""
# Sidebar - API Settings first
...
...
@@ -108,6 +197,7 @@ st.sidebar.title("Chat Sessions")
if
st
.
sidebar
.
button
(
"New Session"
):
create_new_chat_session
()
# Display all sessions in reverse chronological order
for
session_id
in
sorted
(
st
.
session_state
.
sessions
.
keys
(),
reverse
=
True
):
# Mark the active session with a pinned button
...
...
@@ -143,47 +233,79 @@ if st.session_state.current_session is None:
create_new_chat_session
()
st
.
session_state
.
active_session
=
st
.
session_state
.
current_session
# Display chat history for current session
for
message
in
st
.
session_state
.
messages
:
with
st
.
chat_message
(
message
[
"role"
]):
st
.
write
(
message
[
"content"
])
# Update the chat history display section
for
idx
,
msg
in
enumerate
(
st
.
session_state
.
messages
):
# Render user messages normally
if
msg
[
"role"
]
==
"user"
:
with
st
.
chat_message
(
"user"
):
st
.
write
(
msg
[
"content"
])
# Render assistant messages with reasoning above
else
:
# If reasoning exists for this assistant message, show it above the content
if
idx
in
st
.
session_state
.
show_reasoning
:
with
st
.
expander
(
"💭 Thinking Process"
,
expanded
=
False
):
st
.
markdown
(
st
.
session_state
.
show_reasoning
[
idx
])
with
st
.
chat_message
(
"assistant"
):
st
.
write
(
msg
[
"content"
])
# Setup & Cache reasoning support check
@
st
.
cache_data
(
show_spinner
=
False
)
def
server_supports_reasoning
():
"""Check if the current model supports reasoning capability.
Returns:
bool: True if the model supports reasoning, False otherwise
"""
resp
=
client
.
chat
.
completions
.
create
(
model
=
model
,
messages
=
[{
"role"
:
"user"
,
"content"
:
"Hi"
}],
stream
=
False
,
)
return
hasattr
(
resp
.
choices
[
0
].
message
,
"reasoning_content"
)
and
bool
(
resp
.
choices
[
0
].
message
.
reasoning_content
)
# Handle user input and generate llm response
# Check support
supports_reasoning
=
server_supports_reasoning
()
# Add reasoning toggle in sidebar if supported
reason
=
False
# Default to False
if
supports_reasoning
:
reason
=
st
.
sidebar
.
checkbox
(
"Enable Reasoning"
,
value
=
False
)
else
:
st
.
sidebar
.
markdown
(
"<span style='color:gray;'>Reasoning unavailable for this model.</span>"
,
unsafe_allow_html
=
True
,
)
# reason remains False
# Update the input handling section
if
prompt
:
=
st
.
chat_input
(
"Type your message here..."
):
# Save user message
to session
# Save
and display
user message
st
.
session_state
.
messages
.
append
({
"role"
:
"user"
,
"content"
:
prompt
})
st
.
session_state
.
sessions
[
st
.
session_state
.
current_session
]
=
(
st
.
session_state
.
messages
)
# Display user message
with
st
.
chat_message
(
"user"
):
st
.
write
(
prompt
)
# Prepare messages
for llm
m
essages_for_llm
=
[
# Prepare
LLM
messages
m
sgs
=
[
{
"role"
:
m
[
"role"
],
"content"
:
m
[
"content"
]}
for
m
in
st
.
session_state
.
messages
]
#
Generate and display llm
response
#
Stream assistant
response
with
st
.
chat_message
(
"assistant"
):
message_placeholder
=
st
.
empty
()
full_response
=
""
# Get streaming response from llm
response
=
get_llm_response
(
messages_for_llm
,
model
)
if
isinstance
(
response
,
str
):
message_placeholder
.
markdown
(
response
)
full_response
=
response
else
:
for
chunk
in
response
:
if
hasattr
(
chunk
.
choices
[
0
].
delta
,
"content"
):
content
=
chunk
.
choices
[
0
].
delta
.
content
if
content
:
full_response
+=
content
message_placeholder
.
markdown
(
full_response
+
"▌"
)
message_placeholder
.
markdown
(
full_response
)
# Save llm response to session history
st
.
session_state
.
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
full_response
})
# Placeholders: reasoning above, content below
reason_ph
=
st
.
empty
()
content_ph
=
st
.
empty
()
full
,
think
=
get_llm_response
(
msgs
,
model
,
reason
,
content_ph
,
reason_ph
)
# Determine index for this new assistant message
message_index
=
len
(
st
.
session_state
.
messages
)
# Save assistant reply
st
.
session_state
.
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
full
})
# Persist reasoning in session state if any
if
reason
and
think
:
st
.
session_state
.
show_reasoning
[
message_index
]
=
think
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment