Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
390be746
Unverified
Commit
390be746
authored
Oct 17, 2024
by
Cyrus Leung
Committed by
GitHub
Oct 17, 2024
Browse files
[Misc] Print stack trace using `logger.exception` (#9461)
parent
e312e52b
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
26 additions
and
30 deletions
+26
-30
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+3
-3
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+5
-5
vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
.../entrypoints/openai/tool_parsers/internlm2_tool_parser.py
+2
-2
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
+4
-5
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
+4
-4
vllm/executor/multiproc_worker_utils.py
vllm/executor/multiproc_worker_utils.py
+3
-5
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/model_loader/weight_utils.py
+2
-2
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+3
-4
No files found.
vllm/entrypoints/openai/serving_chat.py
View file @
390be746
...
...
@@ -324,7 +324,7 @@ class OpenAIServingChat(OpenAIServing):
else
:
tool_parsers
=
[
None
]
*
num_choices
except
RuntimeError
as
e
:
logger
.
e
rror
(
"Error in tool parser creation
: %s"
,
e
)
logger
.
e
xception
(
"Error in tool parser creation
."
)
data
=
self
.
create_streaming_error_response
(
str
(
e
))
yield
f
"data:
{
data
}
\n\n
"
yield
"data: [DONE]
\n\n
"
...
...
@@ -600,7 +600,7 @@ class OpenAIServingChat(OpenAIServing):
except
ValueError
as
e
:
# TODO: Use a vllm-specific Validation Error
logger
.
e
rror
(
"
e
rror in chat completion stream generator
: %s"
,
e
)
logger
.
e
xception
(
"
E
rror in chat completion stream generator
."
)
data
=
self
.
create_streaming_error_response
(
str
(
e
))
yield
f
"data:
{
data
}
\n\n
"
# Send the final done message after all response.n are finished
...
...
@@ -687,7 +687,7 @@ class OpenAIServingChat(OpenAIServing):
try
:
tool_parser
=
self
.
tool_parser
(
tokenizer
)
except
RuntimeError
as
e
:
logger
.
e
rror
(
"Error in tool parser creation
: %s"
,
e
)
logger
.
e
xception
(
"Error in tool parser creation
."
)
return
self
.
create_error_response
(
str
(
e
))
tool_call_info
=
tool_parser
.
extract_tool_calls
(
...
...
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
View file @
390be746
...
...
@@ -103,9 +103,9 @@ class Hermes2ProToolParser(ToolParser):
tool_calls
=
tool_calls
,
content
=
content
if
content
else
None
)
except
Exception
as
e
:
logger
.
e
rror
(
"Error in extracting tool call from response %s"
,
e
)
except
Exception
:
logger
.
e
xception
(
"Error in extracting tool call from response."
)
return
ExtractedToolCallInformation
(
tools_called
=
False
,
tool_calls
=
[],
content
=
model_output
)
...
...
@@ -333,6 +333,6 @@ class Hermes2ProToolParser(ToolParser):
return
delta
except
Exception
as
e
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
except
Exception
:
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
return
None
# do not stream a delta. skip this token ID.
vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
View file @
390be746
...
...
@@ -166,8 +166,8 @@ class Internlm2ToolParser(ToolParser):
tool_call_arr
[
"arguments"
]
=
self
.
get_argments
(
tool_call_arr
)
self
.
prev_tool_call_arr
=
[
tool_call_arr
]
return
delta
except
Exception
as
e
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
except
Exception
:
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
logger
.
debug
(
"Skipping chunk as a result of tool streaming extraction "
"error"
)
...
...
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
View file @
390be746
...
...
@@ -112,9 +112,8 @@ class Llama3JsonToolParser(ToolParser):
content
=
None
)
return
ret
except
Exception
as
e
:
logger
.
error
(
"Error in extracting tool call from response: %s"
,
e
)
print
(
"ERROR"
,
e
)
except
Exception
:
logger
.
exception
(
"Error in extracting tool call from response."
)
# return information to just treat the tool call as regular JSON
return
ExtractedToolCallInformation
(
tools_called
=
False
,
tool_calls
=
[],
...
...
@@ -269,8 +268,8 @@ class Llama3JsonToolParser(ToolParser):
self
.
prev_tool_call_arr
=
tool_call_arr
return
delta
except
Exception
as
e
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
except
Exception
:
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
logger
.
debug
(
"Skipping chunk as a result of tool streaming extraction "
"error"
)
...
...
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
View file @
390be746
...
...
@@ -111,8 +111,8 @@ class MistralToolParser(ToolParser):
tool_calls
=
tool_calls
,
content
=
content
if
len
(
content
)
>
0
else
None
)
except
Exception
as
e
:
logger
.
e
rror
(
"Error in extracting tool call from response
: %s"
,
e
)
except
Exception
:
logger
.
e
xception
(
"Error in extracting tool call from response
."
)
# return information to just treat the tool call as regular JSON
return
ExtractedToolCallInformation
(
tools_called
=
False
,
tool_calls
=
[],
...
...
@@ -298,8 +298,8 @@ class MistralToolParser(ToolParser):
self
.
prev_tool_call_arr
=
tool_call_arr
return
delta
except
Exception
as
e
:
logger
.
e
rror
(
"Error trying to handle streaming tool call
: %s"
,
e
)
except
Exception
:
logger
.
e
xception
(
"Error trying to handle streaming tool call
."
)
logger
.
debug
(
"Skipping chunk as a result of tool streaming extraction "
"error"
)
...
...
vllm/executor/multiproc_worker_utils.py
View file @
390be746
...
...
@@ -3,7 +3,6 @@ import multiprocessing
import
os
import
sys
import
threading
import
traceback
import
uuid
from
dataclasses
import
dataclass
from
multiprocessing
import
Queue
...
...
@@ -227,10 +226,9 @@ def _run_worker_process(
except
KeyboardInterrupt
:
break
except
BaseException
as
e
:
tb
=
traceback
.
format_exc
()
logger
.
error
(
"Exception in worker %s while processing method %s: %s, %s"
,
process_name
,
method
,
e
,
tb
)
logger
.
exception
(
"Exception in worker %s while processing method %s."
,
process_name
,
method
)
exception
=
e
result_queue
.
put
(
Result
(
task_id
=
task_id
,
value
=
output
,
exception
=
exception
))
...
...
vllm/model_executor/model_loader/weight_utils.py
View file @
390be746
...
...
@@ -499,8 +499,8 @@ def kv_cache_scales_loader(
logger
.
error
(
"File or directory '%s' not found."
,
filename
)
except
json
.
JSONDecodeError
:
logger
.
error
(
"Error decoding JSON in file '%s'."
,
filename
)
except
Exception
as
e
:
logger
.
e
rror
(
"An error occurred while reading '%s'
: %s
"
,
filename
,
e
)
except
Exception
:
logger
.
e
xception
(
"An error occurred while reading '%s'
.
"
,
filename
)
# This section is reached if and only if any of the excepts are hit
# Return an empty iterable (list) => no KV cache scales are loaded
# which ultimately defaults to 1.0 scales
...
...
vllm/platforms/cuda.py
View file @
390be746
...
...
@@ -137,10 +137,9 @@ class CudaPlatform(Platform):
pynvml
.
NVML_P2P_CAPS_INDEX_NVLINK
)
if
p2p_status
!=
pynvml
.
NVML_P2P_STATUS_OK
:
return
False
except
pynvml
.
NVMLError
as
error
:
logger
.
e
rror
(
except
pynvml
.
NVMLError
:
logger
.
e
xception
(
"NVLink detection failed. This is normal if your"
" machine has no NVLink equipped."
,
exc_info
=
error
)
" machine has no NVLink equipped."
)
return
False
return
True
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment