[Misc] Print stack trace using `logger.exception` (#9461)

390be746 · Cyrus Leung · GitHub · e312e52b · 390be746 · 390be746
Unverified Commit 390be746 authored Oct 17, 2024 by Cyrus Leung Committed by GitHub Oct 17, 2024
8 changed files
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -324,7 +324,7 @@ class OpenAIServingChat(OpenAIServing):
            else:
                tool_parsers = [None] * num_choices
        except RuntimeError as e:
-            logger.error("Error in tool parser creation: %s", e)
+            logger.exception("Error in tool parser creation.")
            data = self.create_streaming_error_response(str(e))
            yield f"data: {data}\n\n"
            yield "data: [DONE]\n\n"
@@ -600,7 +600,7 @@ class OpenAIServingChat(OpenAIServing):

        except ValueError as e:
            # TODO: Use a vllm-specific Validation Error
-            logger.error("error in chat completion stream generator: %s", e)
+            logger.exception("Error in chat completion stream generator.")
            data = self.create_streaming_error_response(str(e))
            yield f"data: {data}\n\n"
        # Send the final done message after all response.n are finished
@@ -687,7 +687,7 @@ class OpenAIServingChat(OpenAIServing):
                try:
                    tool_parser = self.tool_parser(tokenizer)
                except RuntimeError as e:
-                    logger.error("Error in tool parser creation: %s", e)
+                    logger.exception("Error in tool parser creation.")
                    return self.create_error_response(str(e))

                tool_call_info = tool_parser.extract_tool_calls(

--- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
@@ -103,9 +103,9 @@ class Hermes2ProToolParser(ToolParser):
                    tool_calls=tool_calls,
                    content=content if content else None)

-            except Exception as e:
-                logger.error("Error in extracting tool call from response %s",
-                             e)
+            except Exception:
+                logger.exception(
+                    "Error in extracting tool call from response.")
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)
@@ -333,6 +333,6 @@ class Hermes2ProToolParser(ToolParser):

            return delta

-        except Exception as e:
-            logger.error("Error trying to handle streaming tool call: %s", e)
+        except Exception:
+            logger.exception("Error trying to handle streaming tool call.")
            return None  # do not stream a delta. skip this token ID.
--- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
@@ -166,8 +166,8 @@ class Internlm2ToolParser(ToolParser):
            tool_call_arr["arguments"] = self.get_argments(tool_call_arr)
            self.prev_tool_call_arr = [tool_call_arr]
            return delta
-        except Exception as e:
-            logger.error("Error trying to handle streaming tool call: %s", e)
+        except Exception:
+            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")

--- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
@@ -112,9 +112,8 @@ class Llama3JsonToolParser(ToolParser):
                                               content=None)
            return ret

-        except Exception as e:
-            logger.error("Error in extracting tool call from response: %s", e)
-            print("ERROR", e)
+        except Exception:
+            logger.exception("Error in extracting tool call from response.")
            # return information to just treat the tool call as regular JSON
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
@@ -269,8 +268,8 @@ class Llama3JsonToolParser(ToolParser):
            self.prev_tool_call_arr = tool_call_arr
            return delta

-        except Exception as e:
-            logger.error("Error trying to handle streaming tool call: %s", e)
+        except Exception:
+            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")

--- a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
@@ -111,8 +111,8 @@ class MistralToolParser(ToolParser):
                tool_calls=tool_calls,
                content=content if len(content) > 0 else None)

-        except Exception as e:
-            logger.error("Error in extracting tool call from response: %s", e)
+        except Exception:
+            logger.exception("Error in extracting tool call from response.")
            # return information to just treat the tool call as regular JSON
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
@@ -298,8 +298,8 @@ class MistralToolParser(ToolParser):
            self.prev_tool_call_arr = tool_call_arr
            return delta

-        except Exception as e:
-            logger.error("Error trying to handle streaming tool call: %s", e)
+        except Exception:
+            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")

--- a/vllm/executor/multiproc_worker_utils.py
+++ b/vllm/executor/multiproc_worker_utils.py
@@ -3,7 +3,6 @@ import multiprocessing
 import os
 import sys
 import threading
-import traceback
 import uuid
 from dataclasses import dataclass
 from multiprocessing import Queue
@@ -227,10 +226,9 @@ def _run_worker_process(
            except KeyboardInterrupt:
                break
            except BaseException as e:
-                tb = traceback.format_exc()
-                logger.error(
-                    "Exception in worker %s while processing method %s: %s, %s",
-                    process_name, method, e, tb)
+                logger.exception(
+                    "Exception in worker %s while processing method %s.",
+                    process_name, method)
                exception = e
            result_queue.put(
                Result(task_id=task_id, value=output, exception=exception))

--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@@ -499,8 +499,8 @@ def kv_cache_scales_loader(
        logger.error("File or directory '%s' not found.", filename)
    except json.JSONDecodeError:
        logger.error("Error decoding JSON in file '%s'.", filename)
-    except Exception as e:
-        logger.error("An error occurred while reading '%s': %s", filename, e)
+    except Exception:
+        logger.exception("An error occurred while reading '%s'.", filename)
    # This section is reached if and only if any of the excepts are hit
    # Return an empty iterable (list) => no KV cache scales are loaded
    # which ultimately defaults to 1.0 scales

--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -137,10 +137,9 @@ class CudaPlatform(Platform):
                            pynvml.NVML_P2P_CAPS_INDEX_NVLINK)
                        if p2p_status != pynvml.NVML_P2P_STATUS_OK:
                            return False
-                    except pynvml.NVMLError as error:
-                        logger.error(
+                    except pynvml.NVMLError:
+                        logger.exception(
                            "NVLink detection failed. This is normal if your"
-                            " machine has no NVLink equipped.",
-                            exc_info=error)
+                            " machine has no NVLink equipped.")
                        return False
        return True