Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3e750697
Unverified
Commit
3e750697
authored
Jun 16, 2025
by
Navanit Dubey
Committed by
GitHub
Jun 16, 2025
Browse files
[DOC] Add reasoning capability to vLLM streamlit code (#19557)
parent
ee35e96a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
165 additions
and
43 deletions
+165
-43
examples/online_serving/streamlit_openai_chatbot_webserver.py
...ples/online_serving/streamlit_openai_chatbot_webserver.py
+165
-43
No files found.
examples/online_serving/streamlit_openai_chatbot_webserver.py
View file @
3e750697
...
@@ -11,6 +11,7 @@ Features:
...
@@ -11,6 +11,7 @@ Features:
- Streaming response display
- Streaming response display
- Configurable API endpoint
- Configurable API endpoint
- Real-time chat history
- Real-time chat history
- Reasoning Display: Optional thinking process visualization
Requirements:
Requirements:
pip install streamlit openai
pip install streamlit openai
...
@@ -51,13 +52,33 @@ if "messages" not in st.session_state:
...
@@ -51,13 +52,33 @@ if "messages" not in st.session_state:
if
"active_session"
not
in
st
.
session_state
:
if
"active_session"
not
in
st
.
session_state
:
st
.
session_state
.
active_session
=
None
st
.
session_state
.
active_session
=
None
# Add new session state for reasoning
if
"show_reasoning"
not
in
st
.
session_state
:
st
.
session_state
.
show_reasoning
=
{}
# Initialize session state for API base URL
# Initialize session state for API base URL
if
"api_base_url"
not
in
st
.
session_state
:
if
"api_base_url"
not
in
st
.
session_state
:
st
.
session_state
.
api_base_url
=
openai_api_base
st
.
session_state
.
api_base_url
=
openai_api_base
def
create_new_chat_session
():
def
create_new_chat_session
():
"""Create a new chat session with timestamp as ID"""
"""Create a new chat session with timestamp as unique identifier.
This function initializes a new chat session by:
1. Generating a timestamp-based session ID
2. Creating an empty message list for the new session
3. Setting the new session as both current and active session
4. Resetting the messages list for the new session
Returns:
None
Session State Updates:
- sessions: Adds new empty message list with timestamp key
- current_session: Sets to new session ID
- active_session: Sets to new session ID
- messages: Resets to empty list
"""
session_id
=
datetime
.
now
().
strftime
(
"%Y-%m-%d %H:%M:%S"
)
session_id
=
datetime
.
now
().
strftime
(
"%Y-%m-%d %H:%M:%S"
)
st
.
session_state
.
sessions
[
session_id
]
=
[]
st
.
session_state
.
sessions
[
session_id
]
=
[]
st
.
session_state
.
current_session
=
session_id
st
.
session_state
.
current_session
=
session_id
...
@@ -66,30 +87,98 @@ def create_new_chat_session():
...
@@ -66,30 +87,98 @@ def create_new_chat_session():
def
switch_to_chat_session
(
session_id
):
def
switch_to_chat_session
(
session_id
):
"""Switch to a different chat session"""
"""Switch the active chat context to a different session.
Args:
session_id (str): The timestamp ID of the session to switch to
This function handles chat session switching by:
1. Setting the specified session as current
2. Updating the active session marker
3. Loading the messages history from the specified session
Session State Updates:
- current_session: Updated to specified session_id
- active_session: Updated to specified session_id
- messages: Loaded from sessions[session_id]
"""
st
.
session_state
.
current_session
=
session_id
st
.
session_state
.
current_session
=
session_id
st
.
session_state
.
active_session
=
session_id
st
.
session_state
.
active_session
=
session_id
st
.
session_state
.
messages
=
st
.
session_state
.
sessions
[
session_id
]
st
.
session_state
.
messages
=
st
.
session_state
.
sessions
[
session_id
]
def
get_llm_response
(
messages
,
model
):
def
get_llm_response
(
messages
,
model
,
reason
,
content_ph
=
None
,
reasoning_ph
=
None
):
"""Ge
t
stream
ing
response
from llm
"""Ge
nerate and
stream
LLM
response
with optional reasoning process.
Args:
Args:
messages: List of message dictionaries
messages (list): List of conversation message dicts with 'role' and 'content'
model: Name of model
model (str): The model identifier to use for generation
reason (bool): Whether to enable and display reasoning process
content_ph (streamlit.empty): Placeholder for streaming response content
reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process
Returns:
Returns:
Streaming response object or error message string
tuple: (str, str)
- First string contains the complete response text
- Second string contains the complete reasoning text (if enabled)
Features:
- Streams both reasoning and response text in real-time
- Handles model API errors gracefully
- Supports live updating of thinking process
- Maintains separate content and reasoning displays
Raises:
Exception: Wrapped in error message if API call fails
Note:
The function uses streamlit placeholders for live updates.
When reason=True, the reasoning process appears above the response.
"""
"""
full_text
=
""
think_text
=
""
live_think
=
None
# Build request parameters
params
=
{
"model"
:
model
,
"messages"
:
messages
,
"stream"
:
True
}
if
reason
:
params
[
"extra_body"
]
=
{
"chat_template_kwargs"
:
{
"enable_thinking"
:
True
}}
try
:
try
:
response
=
client
.
chat
.
completions
.
create
(
response
=
client
.
chat
.
completions
.
create
(
**
params
)
model
=
model
,
messages
=
messages
,
stream
=
True
if
isinstance
(
response
,
str
):
)
if
content_ph
:
return
response
content_ph
.
markdown
(
response
)
return
response
,
""
# Prepare reasoning expander above content
if
reason
and
reasoning_ph
:
exp
=
reasoning_ph
.
expander
(
"💭 Thinking Process (live)"
,
expanded
=
True
)
live_think
=
exp
.
empty
()
# Stream chunks
for
chunk
in
response
:
delta
=
chunk
.
choices
[
0
].
delta
# Stream reasoning first
if
reason
and
hasattr
(
delta
,
"reasoning_content"
)
and
live_think
:
rc
=
delta
.
reasoning_content
if
rc
:
think_text
+=
rc
live_think
.
markdown
(
think_text
+
"▌"
)
# Then stream content
if
hasattr
(
delta
,
"content"
)
and
delta
.
content
and
content_ph
:
full_text
+=
delta
.
content
content_ph
.
markdown
(
full_text
+
"▌"
)
# Finalize displays: reasoning remains above, content below
if
reason
and
live_think
:
live_think
.
markdown
(
think_text
)
if
content_ph
:
content_ph
.
markdown
(
full_text
)
return
full_text
,
think_text
except
Exception
as
e
:
except
Exception
as
e
:
st
.
error
(
f
"Error details:
{
str
(
e
)
}
"
)
st
.
error
(
f
"Error details:
{
str
(
e
)
}
"
)
return
f
"Error:
{
str
(
e
)
}
"
return
f
"Error:
{
str
(
e
)
}
"
,
""
# Sidebar - API Settings first
# Sidebar - API Settings first
...
@@ -108,6 +197,7 @@ st.sidebar.title("Chat Sessions")
...
@@ -108,6 +197,7 @@ st.sidebar.title("Chat Sessions")
if
st
.
sidebar
.
button
(
"New Session"
):
if
st
.
sidebar
.
button
(
"New Session"
):
create_new_chat_session
()
create_new_chat_session
()
# Display all sessions in reverse chronological order
# Display all sessions in reverse chronological order
for
session_id
in
sorted
(
st
.
session_state
.
sessions
.
keys
(),
reverse
=
True
):
for
session_id
in
sorted
(
st
.
session_state
.
sessions
.
keys
(),
reverse
=
True
):
# Mark the active session with a pinned button
# Mark the active session with a pinned button
...
@@ -143,47 +233,79 @@ if st.session_state.current_session is None:
...
@@ -143,47 +233,79 @@ if st.session_state.current_session is None:
create_new_chat_session
()
create_new_chat_session
()
st
.
session_state
.
active_session
=
st
.
session_state
.
current_session
st
.
session_state
.
active_session
=
st
.
session_state
.
current_session
# Display chat history for current session
# Update the chat history display section
for
message
in
st
.
session_state
.
messages
:
for
idx
,
msg
in
enumerate
(
st
.
session_state
.
messages
):
with
st
.
chat_message
(
message
[
"role"
]):
# Render user messages normally
st
.
write
(
message
[
"content"
])
if
msg
[
"role"
]
==
"user"
:
with
st
.
chat_message
(
"user"
):
st
.
write
(
msg
[
"content"
])
# Render assistant messages with reasoning above
else
:
# If reasoning exists for this assistant message, show it above the content
if
idx
in
st
.
session_state
.
show_reasoning
:
with
st
.
expander
(
"💭 Thinking Process"
,
expanded
=
False
):
st
.
markdown
(
st
.
session_state
.
show_reasoning
[
idx
])
with
st
.
chat_message
(
"assistant"
):
st
.
write
(
msg
[
"content"
])
# Setup & Cache reasoning support check
@
st
.
cache_data
(
show_spinner
=
False
)
def
server_supports_reasoning
():
"""Check if the current model supports reasoning capability.
Returns:
bool: True if the model supports reasoning, False otherwise
"""
resp
=
client
.
chat
.
completions
.
create
(
model
=
model
,
messages
=
[{
"role"
:
"user"
,
"content"
:
"Hi"
}],
stream
=
False
,
)
return
hasattr
(
resp
.
choices
[
0
].
message
,
"reasoning_content"
)
and
bool
(
resp
.
choices
[
0
].
message
.
reasoning_content
)
# Handle user input and generate llm response
# Check support
supports_reasoning
=
server_supports_reasoning
()
# Add reasoning toggle in sidebar if supported
reason
=
False
# Default to False
if
supports_reasoning
:
reason
=
st
.
sidebar
.
checkbox
(
"Enable Reasoning"
,
value
=
False
)
else
:
st
.
sidebar
.
markdown
(
"<span style='color:gray;'>Reasoning unavailable for this model.</span>"
,
unsafe_allow_html
=
True
,
)
# reason remains False
# Update the input handling section
if
prompt
:
=
st
.
chat_input
(
"Type your message here..."
):
if
prompt
:
=
st
.
chat_input
(
"Type your message here..."
):
# Save user message
to session
# Save
and display
user message
st
.
session_state
.
messages
.
append
({
"role"
:
"user"
,
"content"
:
prompt
})
st
.
session_state
.
messages
.
append
({
"role"
:
"user"
,
"content"
:
prompt
})
st
.
session_state
.
sessions
[
st
.
session_state
.
current_session
]
=
(
st
.
session_state
.
sessions
[
st
.
session_state
.
current_session
]
=
(
st
.
session_state
.
messages
st
.
session_state
.
messages
)
)
# Display user message
with
st
.
chat_message
(
"user"
):
with
st
.
chat_message
(
"user"
):
st
.
write
(
prompt
)
st
.
write
(
prompt
)
# Prepare messages
for llm
# Prepare
LLM
messages
m
essages_for_llm
=
[
m
sgs
=
[
{
"role"
:
m
[
"role"
],
"content"
:
m
[
"content"
]}
for
m
in
st
.
session_state
.
messages
{
"role"
:
m
[
"role"
],
"content"
:
m
[
"content"
]}
for
m
in
st
.
session_state
.
messages
]
]
#
Generate and display llm
response
#
Stream assistant
response
with
st
.
chat_message
(
"assistant"
):
with
st
.
chat_message
(
"assistant"
):
message_placeholder
=
st
.
empty
()
# Placeholders: reasoning above, content below
full_response
=
""
reason_ph
=
st
.
empty
()
content_ph
=
st
.
empty
()
# Get streaming response from llm
full
,
think
=
get_llm_response
(
msgs
,
model
,
reason
,
content_ph
,
reason_ph
)
response
=
get_llm_response
(
messages_for_llm
,
model
)
# Determine index for this new assistant message
if
isinstance
(
response
,
str
):
message_index
=
len
(
st
.
session_state
.
messages
)
message_placeholder
.
markdown
(
response
)
# Save assistant reply
full_response
=
response
st
.
session_state
.
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
full
})
else
:
# Persist reasoning in session state if any
for
chunk
in
response
:
if
reason
and
think
:
if
hasattr
(
chunk
.
choices
[
0
].
delta
,
"content"
):
st
.
session_state
.
show_reasoning
[
message_index
]
=
think
content
=
chunk
.
choices
[
0
].
delta
.
content
if
content
:
full_response
+=
content
message_placeholder
.
markdown
(
full_response
+
"▌"
)
message_placeholder
.
markdown
(
full_response
)
# Save llm response to session history
st
.
session_state
.
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
full_response
})
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment