feat: Add standardized Dynamo error types as Python exceptions (#7136)

8ff0b6e7 · Tanmay Verma · GitHub · a2077c96 · 8ff0b6e7 · 8ff0b6e7
Unverified Commit 8ff0b6e7 authored Mar 31, 2026 by Tanmay Verma Committed by GitHub Mar 31, 2026
14 changed files
--- a/components/src/dynamo/sglang/request_handlers/handler_base.py
+++ b/components/src/dynamo/sglang/request_handlers/handler_base.py
@@ -23,6 +23,7 @@ import sglang as sgl
 from dynamo._core import Context
 from dynamo.common.utils.input_params import InputParamManager
 from dynamo.llm import KvEventPublisher, WorkerMetricsPublisher
+from dynamo.llm.exceptions import EngineShutdown
 from dynamo.runtime import DistributedRuntime
 from dynamo.sglang._compat import NetworkAddress, get_local_ip_auto
 from dynamo.sglang.args import Config
@@ -541,7 +542,7 @@ class BaseWorkerHandler(BaseGenerativeHandler[RequestT, ResponseT]):
            context: Context object for cancellation handling.
        Raises:
-            GeneratorExit: If shutdown event was triggered.
+            EngineShutdown: If shutdown event was triggered.
        """
        try:
            logging.debug(f"Cancellation monitor started for Context: {context.id()}")
@@ -600,9 +601,9 @@ class BaseWorkerHandler(BaseGenerativeHandler[RequestT, ResponseT]):
                    f"SGLang tokenizer_manager not found for abort request: {context.id()}"
                )
-            # Check which event triggered and raise GeneratorExit if shutdown
+            # Check which event triggered and raise EngineShutdown if shutdown
            if shutdown_task and shutdown_task in done:
-                raise GeneratorExit("Engine was shut down during token generation")
+                raise EngineShutdown("Engine was shut down during token generation")
        except asyncio.CancelledError:
            # Task was cancelled, which is expected when generation completes
@@ -626,7 +627,7 @@ class BaseWorkerHandler(BaseGenerativeHandler[RequestT, ResponseT]):
        Automatically creates a background task to monitor for cancellation and
        shutdown events, cleaning it up when the context exits.
-        If shutdown event was triggered, raises GeneratorExit on exit.
+        If shutdown event was triggered, raises EngineShutdown on exit.
        Args:
            request_id_future: Future that will be set with the SGLang request ID

--- a/components/src/dynamo/trtllm/request_handlers/handler_base.py
+++ b/components/src/dynamo/trtllm/request_handlers/handler_base.py
@@ -33,6 +33,7 @@ from tensorrt_llm.scheduling_params import SchedulingParams
 from dynamo._core import Client, Context
 from dynamo.common.utils.otel_tracing import build_trace_headers
+from dynamo.llm.exceptions import EngineShutdown
 from dynamo.logits_processing.examples import HelloWorldLogitsProcessor
 from dynamo.nixl_connect import Connector
 from dynamo.runtime import DistributedRuntime
@@ -202,7 +203,7 @@ class HandlerBase(BaseGenerativeHandler):
        Background task to trigger cancellation if request is cancelled or shutdown
        event is set.
-        Raise GeneratorExit if shutdown event is triggered.
+        Raise EngineShutdown if shutdown event is triggered.
        """
        try:
            cancellation_triggers: list[asyncio.Future[Any]] = [
@@ -238,9 +239,9 @@ class HandlerBase(BaseGenerativeHandler):
                except asyncio.CancelledError:
                    pass
-            # Raise GeneratorExit if cancellation is due to shutdown event triggered
+            # Raise EngineShutdown if cancellation is due to shutdown event triggered
            if shutdown_task in done:
-                raise GeneratorExit("Engine was shut down during generation.")
+                raise EngineShutdown("Engine was shut down during generation.")
        except asyncio.CancelledError:
            # Task was cancelled, which is expected when generation completes normally
@@ -254,7 +255,7 @@ class HandlerBase(BaseGenerativeHandler):
        Monitor for cancellation triggers and cancel by calling
        generation_result.abort().
-        Raise GeneratorExit if shutdown event is triggered.
+        Raise EngineShutdown if shutdown event is triggered.
        Yields:
            asyncio.Task: The cancellation monitoring task
@@ -968,7 +969,11 @@ class HandlerBase(BaseGenerativeHandler):
                "token_ids": [],
            }
-        # 3. ALL OTHER ERRORS - graceful shutdown
+        # 3. EngineShutdown - let it propagate to the Rust bridge
+        except EngineShutdown:
+            raise
+        # 4. ALL OTHER ERRORS - graceful shutdown
        except Exception as e:
            error_type = type(e).__name__
            error_msg = str(e)

--- a/components/src/dynamo/vllm/handlers.py
+++ b/components/src/dynamo/vllm/handlers.py
@@ -46,6 +46,7 @@ from dynamo.llm import (
    register_model,
    unregister_model,
 )
+from dynamo.llm.exceptions import EngineShutdown
 from dynamo.runtime import Client
 from dynamo.runtime.logging import configure_dynamo_logging
@@ -610,7 +611,7 @@ class BaseWorkerHandler(ABC, Generic[RequestT, ResponseT]):
    async def _monitor_abort(self, context, request_id, is_prefill):
        """
        Background task that monitors for context cancellation and shutdown.
-        Aborts the request if either occurs. Raises GeneratorExit if shutdown was triggered.
+        Aborts the request if either occurs. Raises EngineShutdown if shutdown was triggered.
        """
        try:
            # Build list of futures/tasks to wait for
@@ -642,13 +643,15 @@ class BaseWorkerHandler(ABC, Generic[RequestT, ResponseT]):
                f"Aborted {'Prefill ' if is_prefill else ''}Request ID: {request_id}"
            )
-            # Check which event triggered and raise GeneratorExit if shutdown
+            # Check which event triggered and raise EngineShutdown if shutdown
            if shutdown_task and shutdown_task in done:
-                raise GeneratorExit("Engine was shut down during generation.")
+                raise EngineShutdown("Engine was shut down during generation.")
        except asyncio.CancelledError:
            # Task was cancelled, normal cleanup if not aborted
            pass
+        except EngineShutdown:
+            raise
        except Exception as e:
            logger.error(f"Error in abort monitor for request {request_id}: {e}")
@@ -656,7 +659,7 @@ class BaseWorkerHandler(ABC, Generic[RequestT, ResponseT]):
    async def _abort_monitor(self, context, request_id, is_prefill=False):
        """
        Context manager that creates and automatically cleans up an abort monitoring task.
-        If shutdown event was triggered, raises GeneratorExit on exit.
+        If shutdown event was triggered, raises EngineShutdown on exit.
        """
        task = asyncio.create_task(self._monitor_abort(context, request_id, is_prefill))
        try:
@@ -670,7 +673,7 @@ class BaseWorkerHandler(ABC, Generic[RequestT, ResponseT]):
                except asyncio.CancelledError:
                    pass
            else:
-                # If the task completed, check if it raised GeneratorExit
+                # If the task completed, check if it raised EngineShutdown
                task.result()
    async def clear_kv_blocks(self, request=None):

--- a/components/src/dynamo/vllm/omni/omni_handler.py
+++ b/components/src/dynamo/vllm/omni/omni_handler.py
@@ -35,6 +35,7 @@ from dynamo.common.utils.video_utils import (
    normalize_video_frames,
    parse_size,
 )
+from dynamo.llm.exceptions import EngineShutdown
 from dynamo.vllm.omni.base_handler import BaseOmniHandler
 logger = logging.getLogger(__name__)
@@ -206,7 +207,7 @@ class OmniHandler(BaseOmniHandler):
                        if chunk:
                            yield chunk
-            except GeneratorExit:
+            except EngineShutdown:
                logger.info(f"Request {request_id} aborted due to shutdown")
                raise
            except Exception as e:

--- a/docs/development/backend-guide.md
+++ b/docs/development/backend-guide.md
@@ -115,14 +115,13 @@ In the P/D disaggregated setup you would have `deepseek-distill-llama8b.prefill.
 A Python worker may need to be shut down promptly, for example when the node running the worker is to be reclaimed and there isn't enough time to complete all ongoing requests before the shutdown deadline.
-In such cases, you can signal incomplete responses by raising a `GeneratorExit` exception in your generate loop. This will immediately close the response stream, signaling to the frontend that the stream is incomplete. With request migration enabled (see the [`migration_limit`](../fault-tolerance/request-migration.md) parameter), the frontend will automatically migrate the partially completed request to another worker instance, if available, to be completed.
+In such cases, you can signal incomplete responses by raising an `EngineShutdown` exception in your generate loop. This will immediately close the response stream, signaling to the frontend that the stream is incomplete. With request migration enabled (see the [`migration_limit`](../fault-tolerance/request-migration.md) parameter), the frontend will automatically migrate the partially completed request to another worker instance, if available, to be completed.
-> [!WARNING]
-> We will update the `GeneratorExit` exception to a new Dynamo exception. Please expect minor code breaking change in the near future.
 Here's an example of how to implement this in your `RequestHandler`:
 ```python
+from dynamo.llm.exceptions import EngineShutdown
 class RequestHandler:
    async def generate(self, request):
@@ -130,13 +129,13 @@ class RequestHandler:
        for result in self.engine.generate_streaming(request):
            # Check if we need to migrate before yielding each token
            if is_shutting_down():
-                # Raising GeneratorExit closes the stream and triggers migration
+                # Raising EngineShutdown closes the stream and triggers migration
-                raise GeneratorExit("Worker shutting down, migrating request")
+                raise EngineShutdown("Worker shutting down, migrating request")
            yield result
 ```
-When `GeneratorExit` is raised, the frontend receives the incomplete response and can seamlessly continue generation on another available worker instance, preserving the user experience even during worker shutdowns.
+When `EngineShutdown` is raised, the frontend receives the incomplete response and can seamlessly continue generation on another available worker instance, preserving the user experience even during worker shutdowns.
 For more information about how request migration works, see the [Request Migration Architecture](../fault-tolerance/request-migration.md) documentation.

--- a/lib/bindings/python/rust/engine.rs
+++ b/lib/bindings/python/rust/engine.rs
@@ -22,6 +22,7 @@ pub use dynamo_runtime::{
 };
 use super::context::{Context, callable_accepts_kwarg};
+use super::errors::py_exception_to_backend_error;
 /// Add bingings from this crate to the provided module
 pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
@@ -129,17 +130,14 @@ impl PythonServerStreamingEngine {
 #[derive(Debug, thiserror::Error)]
 enum ResponseProcessingError {
-    #[error("python exception: {0}")]
+    #[error("dynamo error")]
-    PythonException(String),
+    Dynamo(DynamoError),
-    #[error("python generator exit: {0}")]
-    PyGeneratorExit(String),
    #[error("deserialize error: {0}")]
-    DeserializeError(String),
+    Deserialize(String),
    #[error("gil offload error: {0}")]
-    OffloadError(String),
+    Offload(String),
 }
 #[async_trait::async_trait]
@@ -239,7 +237,7 @@ where
                        done = true;
                        match e {
-                            ResponseProcessingError::DeserializeError(e) => {
+                            ResponseProcessingError::Deserialize(e) => {
                                // tell the python async generator to stop generating
                                // right now, this is impossible as we are not passing the context to the python async generator
                                // todo: add task-local context to the python async generator
@@ -249,24 +247,13 @@ where
                                    e
                                ))
                            }
-                            ResponseProcessingError::PyGeneratorExit(_) => Annotated::from_err(
+                            ResponseProcessingError::Dynamo(dynamo_err) => {
-                                DynamoError::builder()
+                                Annotated::from_err(dynamo_err)
-                                    .error_type(ErrorType::Backend(BackendError::EngineShutdown))
-                                    .message("engine shutting down")
-                                    .build(),
-                            ),
-                            ResponseProcessingError::PythonException(e) => {
-                                Annotated::from_error(format!(
-                                    "a python exception was caught while processing the async generator: {}",
-                                    e
-                                ))
                            }
-                            ResponseProcessingError::OffloadError(e) => {
+                            ResponseProcessingError::Offload(e) => Annotated::from_error(format!(
-                                Annotated::from_error(format!(
                                "critical error: failed to offload the python async generator to a new thread: {}",
                                e
-                                ))
+                            )),
-                            }
                        }
                    }
                };
@@ -307,24 +294,67 @@ where
    Resp: Data + for<'de> Deserialize<'de>,
 {
    let item = item.map_err(|e| {
-        println!();
-        let mut is_py_generator_exit = false;
        Python::with_gil(|py| {
            e.display(py);
-            is_py_generator_exit = e.is_instance_of::<pyo3::exceptions::PyGeneratorExit>(py);
-        });
+            // Check if the Python exception is a Dynamo error type.
-        if is_py_generator_exit {
+            // Wrap as Backend* since this is the backend engine context.
-            ResponseProcessingError::PyGeneratorExit(e.to_string())
+            if let Some((backend_err, message)) = py_exception_to_backend_error(py, &e) {
-        } else {
+                return ResponseProcessingError::Dynamo(
-            ResponseProcessingError::PythonException(e.to_string())
+                    DynamoError::builder()
+                        .error_type(ErrorType::Backend(backend_err))
+                        .message(message)
+                        .build(),
+                );
+            }
+            // GeneratorExit from Python's generator protocol (e.g., GC closing
+            // a generator) is treated as an engine shutdown.
+            if e.is_instance_of::<pyo3::exceptions::PyGeneratorExit>(py) {
+                return ResponseProcessingError::Dynamo(
+                    DynamoError::builder()
+                        .error_type(ErrorType::Backend(BackendError::EngineShutdown))
+                        .message("engine shutting down")
+                        .build(),
+                );
            }
+            // Map well-known Python exceptions to specific Backend error types.
+            // Order matters: check subclasses before their parents
+            // (e.g., ConnectionRefusedError before ConnectionError).
+            let backend_err = if e.is_instance_of::<pyo3::exceptions::PyValueError>(py)
+                || e.is_instance_of::<pyo3::exceptions::PyTypeError>(py)
+            {
+                BackendError::InvalidArgument
+            } else if e.is_instance_of::<pyo3::exceptions::PyTimeoutError>(py) {
+                BackendError::ConnectionTimeout
+            } else if e.is_instance_of::<pyo3::exceptions::PyConnectionRefusedError>(py) {
+                BackendError::CannotConnect
+            } else if e.is_instance_of::<pyo3::exceptions::PyConnectionResetError>(py)
+                || e.is_instance_of::<pyo3::exceptions::PyBrokenPipeError>(py)
+                || e.is_instance_of::<pyo3::exceptions::PyConnectionError>(py)
+            {
+                BackendError::Disconnected
+            } else if e.is_instance_of::<pyo3::exceptions::asyncio::CancelledError>(py) {
+                BackendError::Cancelled
+            } else {
+                BackendError::Unknown
+            };
+            ResponseProcessingError::Dynamo(
+                DynamoError::builder()
+                    .error_type(ErrorType::Backend(backend_err))
+                    .message(e.to_string())
+                    .build(),
+            )
+        })
    })?;
    let response = tokio::task::spawn_blocking(move || {
        Python::with_gil(|py| depythonize::<Resp>(&item.into_bound(py)))
    })
    .await
-    .map_err(|e| ResponseProcessingError::OffloadError(e.to_string()))?
+    .map_err(|e| ResponseProcessingError::Offload(e.to_string()))?
-    .map_err(|e| ResponseProcessingError::DeserializeError(e.to_string()))?;
+    .map_err(|e| ResponseProcessingError::Deserialize(e.to_string()))?;
    let response = Annotated::from_data(response);

--- a/lib/bindings/python/rust/errors.rs
+++ b/lib/bindings/python/rust/errors.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//! Python exception types mirroring Dynamo's [`ErrorType`] enum.
+//!
+//! The [`define_dynamo_exceptions!`] macro auto-generates a Python exception class
+//! for each Dynamo error variant, a conversion function from Python exceptions back
+//! to [`DynamoError`], and a registration function for the `_core` module.
+//!
+//! When new variants are added to [`ErrorType`] or [`BackendError`], add a
+//! corresponding entry to the macro invocation below to keep Python exceptions
+//! in sync.
+use dynamo_runtime::error::BackendError;
+use pyo3::prelude::*;
+use pyo3::types::PyModule;
+// Base exception for all Dynamo errors.
+pyo3::create_exception!(dynamo._core, DynamoException, pyo3::exceptions::PyException);
+/// Defines Python exception classes for each Dynamo error type.
+///
+/// For each `(RustExceptionName, BackendError)` pair, the macro:
+/// 1. Creates a Python exception class inheriting from `DynamoException`
+/// 2. Adds it to `py_exception_to_backend_error()` for Python → `BackendError` extraction
+/// 3. Adds it to `register_exceptions()` for module registration
+///
+/// The conversion intentionally returns a `BackendError` variant and message
+/// rather than a fully constructed `DynamoError`. This lets the caller decide
+/// how to wrap it — backend contexts use `ErrorType::Backend(...)`, while
+/// other contexts could map to top-level `ErrorType` variants.
+macro_rules! define_dynamo_exceptions {
+    ( $( ($name:ident, $backend_error:expr) ),* $(,)? ) => {
+        $(
+            pyo3::create_exception!(dynamo._core, $name, DynamoException);
+        )*
+        /// Extract a [`BackendError`] variant from a Python exception if it is
+        /// a known Dynamo exception.
+        ///
+        /// Returns `Some((BackendError, message))` if the exception is a Dynamo
+        /// exception, `None` otherwise. The caller decides how to wrap the
+        /// `BackendError` into an `ErrorType`.
+        pub fn py_exception_to_backend_error(
+            py: Python<'_>,
+            err: &PyErr,
+        ) -> Option<(BackendError, String)> {
+            // Check specific subtypes first (most-specific match wins).
+            $(
+                if err.is_instance_of::<$name>(py) {
+                    let message = err
+                        .value(py)
+                        .str()
+                        .map(|s| s.to_string_lossy().into_owned())
+                        .unwrap_or_default();
+                    return Some(($backend_error, message));
+                }
+            )*
+            // Fall back: check if it's a bare DynamoException (Unknown).
+            if err.is_instance_of::<DynamoException>(py) {
+                let message = err
+                    .value(py)
+                    .str()
+                    .map(|s| s.to_string_lossy().into_owned())
+                    .unwrap_or_default();
+                return Some((BackendError::Unknown, message));
+            }
+            None
+        }
+        /// Register all Dynamo exception classes on the `_core` module.
+        pub fn register_exceptions(m: &Bound<'_, PyModule>) -> PyResult<()> {
+            m.add("DynamoException", m.py().get_type::<DynamoException>())?;
+            $(
+                m.add(stringify!($name), m.py().get_type::<$name>())?;
+            )*
+            Ok(())
+        }
+    };
+}
+// ---------------------------------------------------------------------------
+// Exception definitions — one entry per BackendError variant.
+//
+// All error types are exposed to Python as exception classes. When raised by
+// Python backend code, they are interpreted as Backend* errors in Rust
+// (e.g., raising `InvalidArgument` in Python becomes `BackendInvalidArgument`
+// on the Rust side).
+//
+// When a new variant is added to BackendError in error.rs, add a
+// corresponding line here so that a Python exception is generated.
+// ---------------------------------------------------------------------------
+define_dynamo_exceptions!(
+    (Unknown, BackendError::Unknown),
+    (InvalidArgument, BackendError::InvalidArgument),
+    (CannotConnect, BackendError::CannotConnect),
+    (Disconnected, BackendError::Disconnected),
+    (ConnectionTimeout, BackendError::ConnectionTimeout),
+    (Cancelled, BackendError::Cancelled),
+    (EngineShutdown, BackendError::EngineShutdown),
+    (StreamIncomplete, BackendError::StreamIncomplete),
+);
--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -67,6 +67,7 @@ impl From<RouterMode> for RsRouterMode {
 mod context;
 mod engine;
+pub mod errors;
 mod http;
 mod kserve_grpc;
 mod llm;
@@ -196,6 +197,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<planner::PlannerDecision>()?;
    engine::add_to_module(m)?;
+    errors::register_exceptions(m)?;
    parsers::add_to_module(m)?;
    m.add_class::<prometheus_metrics::RuntimeMetrics>()?;

--- a/lib/bindings/python/src/dynamo/_core.pyi
+++ b/lib/bindings/python/src/dynamo/_core.pyi
@@ -1980,3 +1980,58 @@ class VirtualConnectorClient:
    async def wait(self) -> None:
        """Blocks until there is a new decision to fetch using 'get'"""
        ...
+# =============================================================================
+# Dynamo Exception Types
+#
+# Standardized exceptions for Dynamo error categories. All inherit from
+# DynamoException. The Rust error type mapping depends on the context in
+# which the exception is raised (e.g., backend context wraps as Backend.<*>).
+# =============================================================================
+class DynamoException(Exception):
+    """Base exception for all Dynamo error types."""
+    ...
+class Unknown(DynamoException):
+    """Uncategorized or unknown error."""
+    ...
+class InvalidArgument(DynamoException):
+    """Invalid input (e.g., prompt exceeds context length)."""
+    ...
+class CannotConnect(DynamoException):
+    """Failed to establish a connection."""
+    ...
+class Disconnected(DynamoException):
+    """An established connection was lost."""
+    ...
+class ConnectionTimeout(DynamoException):
+    """A connection or request timed out."""
+    ...
+class Cancelled(DynamoException):
+    """The request was cancelled."""
+    ...
+class EngineShutdown(DynamoException):
+    """The engine process has shut down or crashed."""
+    ...
+class StreamIncomplete(DynamoException):
+    """The response stream was terminated before completion."""
+    ...
--- a/lib/bindings/python/src/dynamo/llm/exceptions.py
+++ b/lib/bindings/python/src/dynamo/llm/exceptions.py
@@ -5,6 +5,16 @@
 import logging
+from dynamo._core import Cancelled as Cancelled
+from dynamo._core import CannotConnect as CannotConnect
+from dynamo._core import ConnectionTimeout as ConnectionTimeout
+from dynamo._core import Disconnected as Disconnected
+from dynamo._core import DynamoException as DynamoException
+from dynamo._core import EngineShutdown as EngineShutdown
+from dynamo._core import InvalidArgument as InvalidArgument
+from dynamo._core import StreamIncomplete as StreamIncomplete
+from dynamo._core import Unknown as Unknown
 logger = logging.getLogger(__name__)
 _MAX_MESSAGE_LENGTH = 8192

--- a/lib/bindings/python/tests/cancellation/test_cancellation.py
+++ b/lib/bindings/python/tests/cancellation/test_cancellation.py
@@ -278,9 +278,8 @@ async def test_server_raise_cancelled(temp_file_store, server, client):
    except ValueError as e:
        # Verify the expected cancellation exception is received
        # TODO: Should this be a asyncio.CancelledError?
-        assert str(e).endswith(
+        assert "CancelledError" in str(e)
-            "a python exception was caught while processing the async generator: CancelledError: "
+        assert "BackendCancelled" in str(e)
-        )
    # Verify server context cancellation status
    # TODO: Server to gracefully stop the stream?

--- a/lib/bindings/python/tests/test_http_server.py
+++ b/lib/bindings/python/tests/test_http_server.py
@@ -190,4 +190,4 @@ async def test_chat_completion_http_error(http_server, msg_to_code: tuple[str, i
            if msg_to_code[0] == MSG_CONTAINS_ERROR:
                assert MSG_CONTAINS_ERROR in str(error_json)
            elif msg_to_code[0] == MSG_CONTAINS_INTERNAL_ERROR:
-                assert "a python exception was caught" in str(error_json).lower()
+                assert "simulated internal error" in str(error_json).lower()
--- a/lib/llm/src/migration.rs
+++ b/lib/llm/src/migration.rs
@@ -27,9 +27,7 @@ fn is_migratable(err: &(dyn StdError + 'static)) -> bool {
        ErrorType::ConnectionTimeout,
        ErrorType::Backend(BackendError::EngineShutdown),
    ];
-    const NON_MIGRATABLE: &[ErrorType] = &[
+    const NON_MIGRATABLE: &[ErrorType] = &[ErrorType::Cancelled];
-        // Future: ErrorType::Cancelled, ErrorType::ValidationError, etc.
-    ];
    error::match_error_chain(err, MIGRATABLE, NON_MIGRATABLE)
 }

--- a/lib/runtime/src/error.rs
+++ b/lib/runtime/src/error.rs
@@ -51,6 +51,8 @@ pub enum ErrorType {
    Disconnected,
    /// A connection or request timed out.
    ConnectionTimeout,
+    /// The request was cancelled (e.g., client disconnected).
+    Cancelled,
    /// Error originating from a backend engine.
    Backend(BackendError),
 }
@@ -63,7 +65,8 @@ impl fmt::Display for ErrorType {
            ErrorType::CannotConnect => write!(f, "CannotConnect"),
            ErrorType::Disconnected => write!(f, "Disconnected"),
            ErrorType::ConnectionTimeout => write!(f, "ConnectionTimeout"),
-            ErrorType::Backend(sub) => write!(f, "Backend.{sub}"),
+            ErrorType::Cancelled => write!(f, "Cancelled"),
+            ErrorType::Backend(sub) => write!(f, "Backend{sub}"),
        }
    }
 }
@@ -75,14 +78,35 @@ impl fmt::Display for ErrorType {
 /// Backend engine error subcategories.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub enum BackendError {
+    /// Uncategorized or unknown backend error.
+    Unknown,
+    /// The request contains invalid input (e.g., prompt exceeds context length).
+    InvalidArgument,
+    /// Failed to establish a connection to a remote worker.
+    CannotConnect,
+    /// An established connection was lost unexpectedly.
+    Disconnected,
+    /// A connection or request timed out.
+    ConnectionTimeout,
+    /// The request was cancelled (e.g., client disconnected).
+    Cancelled,
    /// The engine process has shut down or crashed.
    EngineShutdown,
+    /// The response stream was terminated before completion (e.g., engine dropped mid-stream).
+    StreamIncomplete,
 }
 impl fmt::Display for BackendError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
+            BackendError::Unknown => write!(f, "Unknown"),
+            BackendError::InvalidArgument => write!(f, "InvalidArgument"),
+            BackendError::CannotConnect => write!(f, "CannotConnect"),
+            BackendError::Disconnected => write!(f, "Disconnected"),
+            BackendError::ConnectionTimeout => write!(f, "ConnectionTimeout"),
+            BackendError::Cancelled => write!(f, "Cancelled"),
            BackendError::EngineShutdown => write!(f, "EngineShutdown"),
+            BackendError::StreamIncomplete => write!(f, "StreamIncomplete"),
        }
    }
 }
@@ -427,5 +451,45 @@ mod tests {
    #[test]
    fn test_error_type_display() {
        assert_eq!(ErrorType::Unknown.to_string(), "Unknown");
+        assert_eq!(ErrorType::InvalidArgument.to_string(), "InvalidArgument");
+        assert_eq!(ErrorType::CannotConnect.to_string(), "CannotConnect");
+        assert_eq!(ErrorType::Disconnected.to_string(), "Disconnected");
+        assert_eq!(
+            ErrorType::ConnectionTimeout.to_string(),
+            "ConnectionTimeout"
+        );
+        assert_eq!(ErrorType::Cancelled.to_string(), "Cancelled");
+        assert_eq!(
+            ErrorType::Backend(BackendError::Unknown).to_string(),
+            "BackendUnknown"
+        );
+        assert_eq!(
+            ErrorType::Backend(BackendError::InvalidArgument).to_string(),
+            "BackendInvalidArgument"
+        );
+        assert_eq!(
+            ErrorType::Backend(BackendError::CannotConnect).to_string(),
+            "BackendCannotConnect"
+        );
+        assert_eq!(
+            ErrorType::Backend(BackendError::Disconnected).to_string(),
+            "BackendDisconnected"
+        );
+        assert_eq!(
+            ErrorType::Backend(BackendError::ConnectionTimeout).to_string(),
+            "BackendConnectionTimeout"
+        );
+        assert_eq!(
+            ErrorType::Backend(BackendError::Cancelled).to_string(),
+            "BackendCancelled"
+        );
+        assert_eq!(
+            ErrorType::Backend(BackendError::EngineShutdown).to_string(),
+            "BackendEngineShutdown"
+        );
+        assert_eq!(
+            ErrorType::Backend(BackendError::StreamIncomplete).to_string(),
+            "BackendStreamIncomplete"
+        );
    }
 }