Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
390be746
Unverified
Commit
390be746
authored
Oct 17, 2024
by
Cyrus Leung
Committed by
GitHub
Oct 17, 2024
Browse files
[Misc] Print stack trace using `logger.exception` (#9461)
parent
e312e52b
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
26 additions
and
30 deletions
+26
-30
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+3
-3
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+5
-5
vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
.../entrypoints/openai/tool_parsers/internlm2_tool_parser.py
+2
-2
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
+4
-5
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
+4
-4
vllm/executor/multiproc_worker_utils.py
vllm/executor/multiproc_worker_utils.py
+3
-5
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/model_loader/weight_utils.py
+2
-2
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+3
-4
No files found.
vllm/entrypoints/openai/serving_chat.py
View file @
390be746
...
@@ -324,7 +324,7 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -324,7 +324,7 @@ class OpenAIServingChat(OpenAIServing):
else
:
else
:
tool_parsers
=
[
None
]
*
num_choices
tool_parsers
=
[
None
]
*
num_choices
except
RuntimeError
as
e
:
except
RuntimeError
as
e
:
logger
.
e
rror
(
"Error in tool parser creation
: %s"
,
e
)
logger
.
e
xception
(
"Error in tool parser creation
."
)
data
=
self
.
create_streaming_error_response
(
str
(
e
))
data
=
self
.
create_streaming_error_response
(
str
(
e
))
yield
f
"data:
{
data
}
\n\n
"
yield
f
"data:
{
data
}
\n\n
"
yield
"data: [DONE]
\n\n
"
yield
"data: [DONE]
\n\n
"
...
@@ -600,7 +600,7 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -600,7 +600,7 @@ class OpenAIServingChat(OpenAIServing):
except
ValueError
as
e
:
except
ValueError
as
e
:
# TODO: Use a vllm-specific Validation Error
# TODO: Use a vllm-specific Validation Error
logger
.
e
rror
(
"
e
rror in chat completion stream generator
: %s"
,
e
)
logger
.
e
xception
(
"
E
rror in chat completion stream generator
."
)
data
=
self
.
create_streaming_error_response
(
str
(
e
))
data
=
self
.
create_streaming_error_response
(
str
(
e
))
yield
f
"data:
{
data
}
\n\n
"
yield
f
"data:
{
data
}
\n\n
"
# Send the final done message after all response.n are finished
# Send the final done message after all response.n are finished
...
@@ -687,7 +687,7 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -687,7 +687,7 @@ class OpenAIServingChat(OpenAIServing):
try
:
try
:
tool_parser
=
self
.
tool_parser
(
tokenizer
)
tool_parser
=
self
.
tool_parser
(
tokenizer
)
except
RuntimeError
as
e
:
except
RuntimeError
as
e
:
logger
.
e
rror
(
"Error in tool parser creation
: %s"
,
e
)
logger
.
e
xception
(
"Error in tool parser creation
."
)
return
self
.
create_error_response
(
str
(
e
))
return
self
.
create_error_response
(
str
(
e
))
tool_call_info
=
tool_parser
.
extract_tool_calls
(
tool_call_info
=
tool_parser
.
extract_tool_calls
(
...
...
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
View file @
390be746
...
@@ -103,9 +103,9 @@ class Hermes2ProToolParser(ToolParser):
...
@@ -103,9 +103,9 @@ class Hermes2ProToolParser(ToolParser):
tool_calls
=
tool_calls
,
tool_calls
=
tool_calls
,
content
=
content
if
content
else
None
)
content
=
content
if
content
else
None
)
except
Exception
as
e
:
except
Exception
:
logger
.
e
rror
(
"Error in extracting tool call from response %s"
,
logger
.
e
xception
(
e
)
"Error in extracting tool call from response."
)
return
ExtractedToolCallInformation
(
tools_called
=
False
,
return
ExtractedToolCallInformation
(
tools_called
=
False
,
tool_calls
=
[],
tool_calls
=
[],
content
=
model_output
)
content
=
model_output
)
...
@@ -333,6 +333,6 @@ class Hermes2ProToolParser(ToolParser):
...
@@ -333,6 +333,6 @@ class Hermes2ProToolParser(ToolParser):
return
delta
return
delta
except
Exception
as
e
:
except
Exception
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
return
None
# do not stream a delta. skip this token ID.
return
None
# do not stream a delta. skip this token ID.
vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
View file @
390be746
...
@@ -166,8 +166,8 @@ class Internlm2ToolParser(ToolParser):
...
@@ -166,8 +166,8 @@ class Internlm2ToolParser(ToolParser):
tool_call_arr
[
"arguments"
]
=
self
.
get_argments
(
tool_call_arr
)
tool_call_arr
[
"arguments"
]
=
self
.
get_argments
(
tool_call_arr
)
self
.
prev_tool_call_arr
=
[
tool_call_arr
]
self
.
prev_tool_call_arr
=
[
tool_call_arr
]
return
delta
return
delta
except
Exception
as
e
:
except
Exception
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
logger
.
debug
(
logger
.
debug
(
"Skipping chunk as a result of tool streaming extraction "
"Skipping chunk as a result of tool streaming extraction "
"error"
)
"error"
)
...
...
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
View file @
390be746
...
@@ -112,9 +112,8 @@ class Llama3JsonToolParser(ToolParser):
...
@@ -112,9 +112,8 @@ class Llama3JsonToolParser(ToolParser):
content
=
None
)
content
=
None
)
return
ret
return
ret
except
Exception
as
e
:
except
Exception
:
logger
.
error
(
"Error in extracting tool call from response: %s"
,
e
)
logger
.
exception
(
"Error in extracting tool call from response."
)
print
(
"ERROR"
,
e
)
# return information to just treat the tool call as regular JSON
# return information to just treat the tool call as regular JSON
return
ExtractedToolCallInformation
(
tools_called
=
False
,
return
ExtractedToolCallInformation
(
tools_called
=
False
,
tool_calls
=
[],
tool_calls
=
[],
...
@@ -269,8 +268,8 @@ class Llama3JsonToolParser(ToolParser):
...
@@ -269,8 +268,8 @@ class Llama3JsonToolParser(ToolParser):
self
.
prev_tool_call_arr
=
tool_call_arr
self
.
prev_tool_call_arr
=
tool_call_arr
return
delta
return
delta
except
Exception
as
e
:
except
Exception
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
logger
.
debug
(
logger
.
debug
(
"Skipping chunk as a result of tool streaming extraction "
"Skipping chunk as a result of tool streaming extraction "
"error"
)
"error"
)
...
...
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
View file @
390be746
...
@@ -111,8 +111,8 @@ class MistralToolParser(ToolParser):
...
@@ -111,8 +111,8 @@ class MistralToolParser(ToolParser):
tool_calls
=
tool_calls
,
tool_calls
=
tool_calls
,
content
=
content
if
len
(
content
)
>
0
else
None
)
content
=
content
if
len
(
content
)
>
0
else
None
)
except
Exception
as
e
:
except
Exception
:
logger
.
e
rror
(
"Error in extracting tool call from response
: %s"
,
e
)
logger
.
e
xception
(
"Error in extracting tool call from response
."
)
# return information to just treat the tool call as regular JSON
# return information to just treat the tool call as regular JSON
return
ExtractedToolCallInformation
(
tools_called
=
False
,
return
ExtractedToolCallInformation
(
tools_called
=
False
,
tool_calls
=
[],
tool_calls
=
[],
...
@@ -298,8 +298,8 @@ class MistralToolParser(ToolParser):
...
@@ -298,8 +298,8 @@ class MistralToolParser(ToolParser):
self
.
prev_tool_call_arr
=
tool_call_arr
self
.
prev_tool_call_arr
=
tool_call_arr
return
delta
return
delta
except
Exception
as
e
:
except
Exception
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
logger
.
debug
(
logger
.
debug
(
"Skipping chunk as a result of tool streaming extraction "
"Skipping chunk as a result of tool streaming extraction "
"error"
)
"error"
)
...
...
vllm/executor/multiproc_worker_utils.py
View file @
390be746
...
@@ -3,7 +3,6 @@ import multiprocessing
...
@@ -3,7 +3,6 @@ import multiprocessing
import
os
import
os
import
sys
import
sys
import
threading
import
threading
import
traceback
import
uuid
import
uuid
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
multiprocessing
import
Queue
from
multiprocessing
import
Queue
...
@@ -227,10 +226,9 @@ def _run_worker_process(
...
@@ -227,10 +226,9 @@ def _run_worker_process(
except
KeyboardInterrupt
:
except
KeyboardInterrupt
:
break
break
except
BaseException
as
e
:
except
BaseException
as
e
:
tb
=
traceback
.
format_exc
()
logger
.
exception
(
logger
.
error
(
"Exception in worker %s while processing method %s."
,
"Exception in worker %s while processing method %s: %s, %s"
,
process_name
,
method
)
process_name
,
method
,
e
,
tb
)
exception
=
e
exception
=
e
result_queue
.
put
(
result_queue
.
put
(
Result
(
task_id
=
task_id
,
value
=
output
,
exception
=
exception
))
Result
(
task_id
=
task_id
,
value
=
output
,
exception
=
exception
))
...
...
vllm/model_executor/model_loader/weight_utils.py
View file @
390be746
...
@@ -499,8 +499,8 @@ def kv_cache_scales_loader(
...
@@ -499,8 +499,8 @@ def kv_cache_scales_loader(
logger
.
error
(
"File or directory '%s' not found."
,
filename
)
logger
.
error
(
"File or directory '%s' not found."
,
filename
)
except
json
.
JSONDecodeError
:
except
json
.
JSONDecodeError
:
logger
.
error
(
"Error decoding JSON in file '%s'."
,
filename
)
logger
.
error
(
"Error decoding JSON in file '%s'."
,
filename
)
except
Exception
as
e
:
except
Exception
:
logger
.
e
rror
(
"An error occurred while reading '%s'
: %s
"
,
filename
,
e
)
logger
.
e
xception
(
"An error occurred while reading '%s'
.
"
,
filename
)
# This section is reached if and only if any of the excepts are hit
# This section is reached if and only if any of the excepts are hit
# Return an empty iterable (list) => no KV cache scales are loaded
# Return an empty iterable (list) => no KV cache scales are loaded
# which ultimately defaults to 1.0 scales
# which ultimately defaults to 1.0 scales
...
...
vllm/platforms/cuda.py
View file @
390be746
...
@@ -137,10 +137,9 @@ class CudaPlatform(Platform):
...
@@ -137,10 +137,9 @@ class CudaPlatform(Platform):
pynvml
.
NVML_P2P_CAPS_INDEX_NVLINK
)
pynvml
.
NVML_P2P_CAPS_INDEX_NVLINK
)
if
p2p_status
!=
pynvml
.
NVML_P2P_STATUS_OK
:
if
p2p_status
!=
pynvml
.
NVML_P2P_STATUS_OK
:
return
False
return
False
except
pynvml
.
NVMLError
as
error
:
except
pynvml
.
NVMLError
:
logger
.
e
rror
(
logger
.
e
xception
(
"NVLink detection failed. This is normal if your"
"NVLink detection failed. This is normal if your"
" machine has no NVLink equipped."
,
" machine has no NVLink equipped."
)
exc_info
=
error
)
return
False
return
False
return
True
return
True
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment