first

2a934cec · raojy · 4b618aa3 · 2a934cec · 2a934cec · 2a934cec
Commit 2a934cec authored May 25, 2026 by raojy
20 changed files
--- a/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/anthropic_adapter.py
+++ b/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/anthropic_adapter.py
+"""Anthropic Messages API adapter for VLM (async only, vision support).
+Supports Anthropic's /v1/messages endpoint with image inputs.
+Usage:
+    from vlm.anthropic_adapter import AnthropicVlmAdapter
+    adapter = AnthropicVlmAdapter(
+        endpoint_url="https://api.anthropic.com/v1/messages",
+        api_key="sk-ant-xxx",
+        model="claude-sonnet-4-6",
+    )
+    result = await adapter.vision_completion(
+        user_prompt="Describe this image",
+        images=["path/to/image.png"],
+        system_prompt="You are a helpful assistant.",
+    )
+"""
+from __future__ import annotations
+import logging
+from typing import Any
+import httpx
+from .utils import image_to_base64
+from .vlm_adapter import VlmAdapter
+logger = logging.getLogger(__name__)
+DEFAULT_REQUEST_TIMEOUT = 150.0
+DEFAULT_MAX_TOKENS = 4096
+class AnthropicVlmAdapter(VlmAdapter):
+    """VLM adapter for Anthropic Messages API.
+    Features:
+    * Vision support via ``image`` content blocks (base64 encoded).
+    * Shared or internally-created :class:`httpx.AsyncClient` for connection
+      pooling.
+    * Model name can be overridden per-call or at initialization.
+    This adapter is intentionally generic. No preset base_url, model, or system prompt.
+    All required parameters must be provided by the caller.
+    """
+    def __init__(
+        self,
+        endpoint_url: str,
+        api_key: str,
+        model: str,
+        *,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
+        timeout: float = DEFAULT_REQUEST_TIMEOUT,
+        async_client: httpx.AsyncClient | None = None,
+    ) -> None:
+        """Initialize the Anthropic Messages VLM adapter.
+        Args:
+            endpoint_url: Full ``/v1/messages`` endpoint URL
+                (e.g. ``https://api.anthropic.com/v1/messages``).
+            api_key: Bearer token for the ``Authorization`` header.
+            model: Default model name sent in the request payload.
+            max_tokens: Maximum tokens to generate. Defaults to 4096.
+            timeout: Request timeout in seconds. Defaults to 150.
+            async_client (httpx.AsyncClient | None, optional):
+                Shared HTTP client supplied by the caller. When
+                provided the adapter reuses it and will *not* close it in
+                :meth:`aclose`. Defaults to None.
+        """
+        self._url = endpoint_url
+        self._api_key = api_key
+        self._default_model = model
+        self._max_tokens = max_tokens
+        self._timeout = timeout
+        self._external_client = async_client
+        self._client: httpx.AsyncClient | None = async_client
+        logger.info(
+            "AnthropicVlmAdapter: endpoint=%s model=%s max_tokens=%s",
+            self._url,
+            self._default_model,
+            self._max_tokens,
+        )
+    def _get_client(self) -> httpx.AsyncClient:
+        """Return the async HTTP client, creating it lazily if needed."""
+        if self._client is None:
+            self._client = httpx.AsyncClient(timeout=self._timeout)
+        return self._client
+    def _build_content_blocks(
+        self,
+        user_prompt: str,
+        images: list[str | bytes],
+    ) -> list[dict[str, Any]]:
+        """Build Anthropic-style content blocks with text and image blocks.
+        Args:
+            user_prompt: The text instruction.
+            images: Images to include in the user turn.
+        Returns:
+            list[dict[str, Any]]: Anthropic-style content blocks.
+        """
+        blocks: list[dict[str, Any]] = [{"type": "text", "text": user_prompt}]
+        for img in images:
+            mime, b64 = image_to_base64(img)
+            blocks.append(
+                {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": mime,
+                        "data": b64,
+                    },
+                }
+            )
+        return blocks
+    def _build_payload(
+        self,
+        user_prompt: str,
+        images: list[str | bytes],
+        system_prompt: str,
+        model: str | None,
+    ) -> dict[str, Any]:
+        """Assemble the full JSON request payload for Anthropic Messages API.
+        Args:
+            user_prompt: User-facing text instruction.
+            images: Images for the user turn.
+            system_prompt: System instruction (may be empty).
+            model: Model name to use (overrides default if provided).
+        Returns:
+            dict[str, Any]: JSON-serialisable request body.
+        """
+        messages: list[dict[str, Any]] = []
+        if system_prompt:
+            messages.append({"role": "user", "content": system_prompt})
+        messages.append(
+            {
+                "role": "user",
+                "content": self._build_content_blocks(user_prompt, images),
+            }
+        )
+        payload: dict[str, Any] = {
+            "model": model or self._default_model,
+            "messages": messages,
+            "max_tokens": self._max_tokens,
+        }
+        return payload
+    @staticmethod
+    def _parse_response(data: dict[str, Any]) -> str:
+        """Extract the assistant message text from Anthropic Messages response.
+        Handles responses with or without content blocks, and extracts text
+        from content blocks when available.
+        Args:
+            data: Parsed JSON response body.
+        Returns:
+            str: Assistant text content.
+        Raises:
+            RuntimeError: If the response contains no extractable content.
+        """
+        content = data.get("content", [])
+        if content:
+            for block in content:
+                if isinstance(block, dict) and block.get("type") == "text":
+                    return block.get("text", "")
+        thinking = data.get("thinking")
+        if thinking:
+            return f"[Think] {thinking}"
+        raise RuntimeError("Anthropic Messages response has no extractable content.")
+    async def vision_completion(
+        self,
+        user_prompt: str,
+        images: list[str | bytes],
+        system_prompt: str = "",
+        model: str | None = None,
+    ) -> str:
+        """Call the ``/v1/messages`` endpoint with vision content.
+        Args:
+            user_prompt: User-facing text instruction.
+            images: Images to include in the user turn.
+            system_prompt: System-level instruction. Defaults to ''.
+            model: Model name to use. Defaults to the model set at init.
+        Returns:
+            str: Assistant message text extracted from the API response.
+        Raises:
+            httpx.HTTPStatusError: On non-2xx HTTP responses.
+            RuntimeError: If the response contains no content.
+        """
+        payload = self._build_payload(user_prompt, images, system_prompt, model)
+        headers = {
+            "Authorization": f"Bearer {self._api_key}",
+            "Content-Type": "application/json",
+            "x-api-key": self._api_key,
+        }
+        resp = await self._get_client().post(self._url, json=payload, headers=headers)
+        resp.raise_for_status()
+        return self._parse_response(resp.json())
+    async def aclose(self) -> None:
+        """Close the internal async HTTP client if we own it.
+        Has no effect when the client was injected from outside.
+        """
+        if self._external_client is None and self._client is not None:
+            await self._client.aclose()
+            self._client = None
--- a/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/chat_completions_adapter.py
+++ b/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/chat_completions_adapter.py
+"""OpenAI-compatible chat/completions VLM adapter (async only).
+Supports any backend that follows the standard ``POST /chat/completions``
+request/response schema with vision support (image_url content blocks).
+Usage:
+    from vlm.chat_completions_adapter import ChatCompletionsVlmAdapter
+    adapter = ChatCompletionsVlmAdapter(
+        endpoint_url="https://api.openai.com/v1/chat/completions",
+        api_key="sk-xxx",
+        model="gpt-4o",
+    )
+    result = await adapter.vision_completion(
+        user_prompt="Describe this image",
+        images=["path/to/image.png"],
+        system_prompt="You are a helpful assistant.",
+    )
+"""
+from __future__ import annotations
+import logging
+from typing import Any
+import httpx
+from .utils import image_to_data_url
+from .vlm_adapter import VlmAdapter
+logger = logging.getLogger(__name__)
+DEFAULT_REQUEST_TIMEOUT = 1500.0
+class ChatCompletionsVlmAdapter(VlmAdapter):
+    """VLM adapter for any OpenAI-compatible ``/chat/completions`` endpoint.
+    Features:
+    * Multimodal ``image_url`` vision content (images encoded as data URLs).
+    * Optional ``reasoning_effort`` request field (Cloudsway extension).
+    * Shared or internally-created :class:`httpx.AsyncClient` for connection
+      pooling.
+    * Model name can be overridden per-call or at initialization.
+    This adapter is intentionally generic. No preset base_url, model, or system prompt.
+    All required parameters must be provided by the caller.
+    """
+    def __init__(
+        self,
+        endpoint_url: str,
+        api_key: str,
+        model: str,
+        *,
+        timeout: float = DEFAULT_REQUEST_TIMEOUT,
+        async_client: httpx.AsyncClient | None = None,
+        reasoning_effort: str | None = None,
+    ) -> None:
+        """Initialize the chat/completions VLM adapter.
+        Args:
+            endpoint_url: Full ``/chat/completions`` endpoint URL
+                (e.g. ``https://api.openai.com/v1/chat/completions``).
+            api_key: Bearer token for the ``Authorization`` header.
+            model: Default model name sent in the request payload.
+            timeout: Request timeout in seconds. Defaults to 1500.
+            async_client (httpx.AsyncClient | None, optional):
+                Shared HTTP client supplied by the caller. When
+                provided the adapter reuses it and will *not* close it in
+                :meth:`aclose`. Defaults to None.
+            reasoning_effort (str | None, optional):
+                Optional ``reasoning_effort`` field appended
+                to the JSON body (e.g. ``'high'``). Pass ``None`` or ``''``
+                to omit the field. Defaults to None.
+        """
+        self._url = endpoint_url
+        self._api_key = api_key
+        self._default_model = model
+        self._timeout = timeout
+        self._reasoning_effort = reasoning_effort or None
+        self._external_client = async_client
+        self._client: httpx.AsyncClient | None = async_client
+        logger.info(
+            "ChatCompletionsVlmAdapter: endpoint=%s model=%s reasoning_effort=%s",
+            self._url,
+            self._default_model,
+            self._reasoning_effort,
+        )
+    def _get_client(self) -> httpx.AsyncClient:
+        """Return the async HTTP client, creating it lazily if needed."""
+        if self._client is None:
+            self._client = httpx.AsyncClient(timeout=self._timeout)
+        return self._client
+    @staticmethod
+    def _build_user_content(
+        user_prompt: str,
+        images: list[str | bytes],
+    ) -> list[dict[str, Any]]:
+        """Build the ``user`` turn content list with text + image_url blocks.
+        Args:
+            user_prompt: The text instruction.
+            images: Images encoded as data URLs.
+        Returns:
+            list[dict[str, Any]]: OpenAI-style multimodal content blocks.
+        """
+        content: list[dict[str, Any]] = [{"type": "text", "text": user_prompt}]
+        content.extend(
+            {"type": "image_url", "image_url": {"url": image_to_data_url(img)}} for img in images
+        )
+        return content
+    def _build_payload(
+        self,
+        user_prompt: str,
+        images: list[str | bytes],
+        system_prompt: str,
+        model: str | None,
+    ) -> dict[str, Any]:
+        """Assemble the full JSON request payload for a vision call.
+        Args:
+            user_prompt: User-facing text instruction.
+            images: Images for the user turn.
+            system_prompt: System instruction (may be empty).
+            model: Model name to use (overrides default if provided).
+        Returns:
+            dict[str, Any]: JSON-serialisable request body.
+        """
+        messages: list[dict[str, Any]] = [
+            {
+                "role": "user",
+                "content": self._build_user_content(user_prompt, images),
+            },
+        ]
+        if system_prompt:
+            messages.insert(0, {"role": "system", "content": system_prompt})
+        payload: dict[str, Any] = {
+            "model": model or self._default_model,
+            "messages": messages,
+        }
+        if self._reasoning_effort:
+            payload["reasoning_effort"] = self._reasoning_effort
+        return payload
+    @staticmethod
+    def _parse_response(data: dict[str, Any]) -> str:
+        """Extract the assistant message text from a chat/completions response.
+        Handles both plain-string and list-of-content-blocks message formats.
+        Args:
+            data: Parsed JSON response body.
+        Returns:
+            str: Concatenated assistant text.
+        Raises:
+            RuntimeError: If the response contains no ``choices``.
+        """
+        choice = (data.get("choices") or [None])[0]
+        if not choice:
+            raise RuntimeError("chat/completions response has no choices.")
+        msg = choice.get("message", {})
+        content_val = msg.get("content")
+        if isinstance(content_val, str):
+            return content_val
+        if isinstance(content_val, list):
+            parts: list[str] = []
+            for block in content_val:
+                if isinstance(block, dict) and block.get("type") == "text":
+                    text = block.get("text")
+                    if isinstance(text, str):
+                        parts.append(text)
+            return "".join(parts)
+        return str(content_val or "")
+    async def vision_completion(
+        self,
+        user_prompt: str,
+        images: list[str | bytes],
+        system_prompt: str = "",
+        model: str | None = None,
+    ) -> str:
+        """Call the ``/chat/completions`` endpoint with vision content.
+        Args:
+            user_prompt: User-facing text instruction.
+            images: Images to include in the user turn.
+            system_prompt: System-level instruction. Defaults to ''.
+            model: Model name to use. Defaults to the model set at init.
+        Returns:
+            str: Assistant message text extracted from the API response.
+        Raises:
+            httpx.HTTPStatusError: On non-2xx HTTP responses.
+            RuntimeError: If the response contains no ``choices``.
+        """
+        payload = self._build_payload(user_prompt, images, system_prompt, model)
+        headers = {
+            "Authorization": f"Bearer {self._api_key}",
+            "Content-Type": "application/json",
+        }
+        resp = await self._get_client().post(self._url, json=payload, headers=headers)
+        resp.raise_for_status()
+        return self._parse_response(resp.json())
+    async def aclose(self) -> None:
+        """Close the internal async HTTP client if we own it.
+        Has no effect when the client was injected from outside.
+        """
+        if self._external_client is None and self._client is not None:
+            await self._client.aclose()
+            self._client = None
--- a/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/utils.py
+++ b/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/utils.py
+"""Image encoding / decoding utilities for VLM."""
+from __future__ import annotations
+import base64
+import io
+from pathlib import Path
+from PIL import Image
+def read_image_bytes(image: str | bytes) -> bytes:
+    """Read raw image bytes from a path or return bytes unchanged.
+    Args:
+        image: File path to an image, or raw image bytes.
+    Returns:
+        bytes: Raw image bytes.
+    Raises:
+        FileNotFoundError: If image is a path and the file does not exist.
+    """
+    if isinstance(image, bytes):
+        return image
+    path = Path(image)
+    if not path.is_file():
+        raise FileNotFoundError(f"Image file not found: {image}")
+    return path.read_bytes()
+def detect_mime(data: bytes) -> str:
+    """Infer MIME type from image magic bytes.
+    Args:
+        data: Raw image bytes (at least 8 bytes for PNG check).
+    Returns:
+        str: 'image/png', 'image/jpeg', or 'image/png' as fallback.
+    """
+    if data[:8] == b"\x89PNG\r\n\x1a\n":
+        return "image/png"
+    if data[:3] == b"\xff\xd8\xff":
+        return "image/jpeg"
+    return "image/png"
+def detect_suffix(data: bytes) -> str:
+    """Infer file suffix from image magic bytes.
+    Args:
+        data: Raw image bytes.
+    Returns:
+        str: '.png', '.jpg', or '.bin' as fallback.
+    """
+    if data[:8] == b"\x89PNG\r\n\x1a\n":
+        return ".png"
+    if data[:3] == b"\xff\xd8\xff":
+        return ".jpg"
+    return ".bin"
+def image_to_mime_and_bytes(image: str | bytes) -> tuple[str, bytes]:
+    """Get MIME type and raw bytes; convert to PNG if format is not PNG/JPEG.
+    Args:
+        image: File path or raw image bytes.
+    Returns:
+        tuple[str, bytes]: (mime_type, raw_bytes). Unknown formats become PNG.
+    """
+    raw = read_image_bytes(image)
+    mime = detect_mime(raw)
+    if mime in ("image/png", "image/jpeg"):
+        return mime, raw
+    img = Image.open(io.BytesIO(raw)).convert("RGBA")
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return "image/png", buf.getvalue()
+def image_to_base64(image: str | bytes) -> tuple[str, str]:
+    """Encode image to MIME type and base64 string.
+    Args:
+        image: File path or raw image bytes.
+    Returns:
+        tuple[str, str]: (mime_type, base64_encoded_string).
+    """
+    mime, raw = image_to_mime_and_bytes(image)
+    return mime, base64.b64encode(raw).decode("utf-8")
+def image_to_data_url(image: str | bytes) -> str:
+    """Build a data URL (data:mime;base64,...) for the image.
+    Args:
+        image: File path or raw image bytes.
+    Returns:
+        str: Data URL string.
+    """
+    mime, b64 = image_to_base64(image)
+    return f"data:{mime};base64,{b64}"
+def mask_secret(secret: str) -> str:
+    """Mask a secret for logging (e.g. show first 6 and last 4 chars).
+    Args:
+        secret: Raw secret string.
+    Returns:
+        str: Masked string (e.g. 'abcdef...ghij' or all '*' if length <= 8).
+    """
+    if len(secret) <= 8:
+        return "*" * len(secret)
+    return f"{secret[:6]}...{secret[-4:]}"
--- a/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/vlm_adapter.py
+++ b/SenseNova-U1/src/sensenova_u1/prompt_enhance/adapters/vlm_adapter.py
+"""Abstract base class for VLM (Vision Language Model) adapters."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+class VlmAdapter(ABC):
+    """Uniform async interface for a single Vision Language Model backend.
+    Each concrete adapter wraps one LLM endpoint + model combination and
+    exposes a single :meth:`vision_completion` coroutine.  Synchronous
+    calling is intentionally **not** supported; callers must run inside an
+    asyncio event loop.
+    **Client ownership contract** — when a shared
+    :class:`httpx.AsyncClient` is supplied at construction time the adapter
+    *reuses* it and must **not** close it; the caller retains full ownership
+    of the client's lifecycle.  When no external client is provided the
+    adapter creates and owns an internal client and must close it in
+    :meth:`aclose`.
+    """
+    @abstractmethod
+    async def vision_completion(
+        self,
+        user_prompt: str,
+        images: list[str | bytes],
+        system_prompt: str = "",
+        model: str | None = None,
+    ) -> str:
+        """Send image(s) and a text prompt to the model; return the reply.
+        Args:
+            user_prompt: User-facing text instruction.
+            images: One or more images to pass to the model.  Each element
+                is either a file-path string or raw image bytes.
+            system_prompt: System-level instruction prepended to the
+                conversation.  Defaults to ''.
+            model: Model name to use. If None, uses the default set at
+                initialization.
+        Returns:
+            str: Raw text response from the model (may contain JSON or
+                markdown-wrapped JSON depending on the model and prompt).
+        """
+    @abstractmethod
+    async def aclose(self) -> None:
+        """Release async resources owned by this adapter.
+        Must be called when the adapter is no longer needed.  Adapters that
+        were given an external shared client must implement this as a no-op;
+        adapters that created their own internal client must close it here.
+        """
--- a/SenseNova-U1/src/sensenova_u1/prompt_enhance/enhancer.py
+++ b/SenseNova-U1/src/sensenova_u1/prompt_enhance/enhancer.py
+from __future__ import annotations
+import asyncio
+import os
+from ._templates import AVAILABLE_STYLES, load_system_prompt
+from .adapters import AnthropicVlmAdapter, ChatCompletionsVlmAdapter, VlmAdapter
+DEFAULT_STYLE = "infographic"
+DEFAULT_BACKEND = "chat_completions"
+DEFAULT_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"
+DEFAULT_MODEL = "gemini-3.1-pro"
+_ENV_PREFIX = "U1_ENHANCE_"
+_SUPPORTED_BACKENDS = ("chat_completions", "anthropic")
+def make_adapter_from_env(
+    *,
+    backend: str | None = None,
+    endpoint: str | None = None,
+    api_key: str | None = None,
+    model: str | None = None,
+) -> VlmAdapter:
+    """Construct a :class:`VlmAdapter` from env + explicit overrides.
+    Resolution order (highest priority first):
+    1. Explicit kwargs passed to this function.
+    2. ``U1_ENHANCE_BACKEND`` / ``U1_ENHANCE_ENDPOINT`` / ``U1_ENHANCE_API_KEY``
+       / ``U1_ENHANCE_MODEL`` environment variables.
+    3. Defaults (Gemini 3.1 Pro via its OpenAI-compatible endpoint).
+    Args:
+        backend: ``'chat_completions'`` (any OpenAI-compatible, Gemini,
+            Kimi etc.) or ``'anthropic'``.
+        endpoint: Full URL of the ``/chat/completions`` or ``/v1/messages``
+            endpoint.
+        api_key: Bearer token.
+        model: Model name string sent in the request body.
+    Raises:
+        RuntimeError: If no API key can be resolved.
+        ValueError: If ``backend`` is unsupported.
+    """
+    backend = (backend or os.environ.get(f"{_ENV_PREFIX}BACKEND") or DEFAULT_BACKEND).lower()
+    endpoint = endpoint or os.environ.get(f"{_ENV_PREFIX}ENDPOINT") or DEFAULT_ENDPOINT
+    model = model or os.environ.get(f"{_ENV_PREFIX}MODEL") or DEFAULT_MODEL
+    api_key = api_key or os.environ.get(f"{_ENV_PREFIX}API_KEY")
+    if not api_key:
+        raise RuntimeError(
+            f"Prompt enhancement requires an API key. Set {_ENV_PREFIX}API_KEY or pass api_key= explicitly."
+        )
+    if backend == "chat_completions":
+        return ChatCompletionsVlmAdapter(endpoint_url=endpoint, api_key=api_key, model=model)
+    if backend == "anthropic":
+        return AnthropicVlmAdapter(endpoint_url=endpoint, api_key=api_key, model=model)
+    raise ValueError(f"Unsupported enhance backend {backend!r}; supported: {_SUPPORTED_BACKENDS}")
+class PromptEnhancer:
+    """Thin facade that turns a :class:`VlmAdapter` into a one-shot enhancer.
+    Both entry points call the adapter with ``images=[]`` so this works with
+    any text-only chat-style LLM; vision-capable backends simply ignore the
+    empty image list.
+    The enhancer does not own the adapter's HTTP client's lifecycle –
+    call :meth:`aclose` explicitly (or rely on the sync :meth:`enhance` path
+    which spins up / tears down a fresh event loop and closes the client for you).
+    """
+    def __init__(self, adapter: VlmAdapter, *, style: str = DEFAULT_STYLE) -> None:
+        if style not in AVAILABLE_STYLES:
+            raise ValueError(f"Unknown enhance style {style!r}; supported: {AVAILABLE_STYLES}")
+        self._adapter = adapter
+        self._style = style
+        self._system_prompt = load_system_prompt(style)
+    @classmethod
+    def from_env(
+        cls,
+        *,
+        style: str = DEFAULT_STYLE,
+        backend: str | None = None,
+        endpoint: str | None = None,
+        api_key: str | None = None,
+        model: str | None = None,
+    ) -> PromptEnhancer:
+        """Convenience constructor that reads the ``U1_ENHANCE_*`` env vars."""
+        adapter = make_adapter_from_env(backend=backend, endpoint=endpoint, api_key=api_key, model=model)
+        return cls(adapter, style=style)
+    @property
+    def style(self) -> str:
+        return self._style
+    async def aenhance(self, user_prompt: str) -> str:
+        """Async entry point: expand ``user_prompt`` into a long T2I prompt."""
+        return await self._adapter.vision_completion(
+            user_prompt=user_prompt,
+            images=[],
+            system_prompt=self._system_prompt,
+        )
+    def enhance(self, user_prompt: str) -> str:
+        """Sync wrapper around :meth:`aenhance`.
+        Creates and tears down its own event loop on every call – fine for a
+        CLI that enhances a handful of prompts, but do not use inside an
+        already-running event loop (call :meth:`aenhance` directly there).
+        """
+        try:
+            asyncio.get_running_loop()
+        except RuntimeError:
+            pass
+        else:
+            raise RuntimeError(
+                "PromptEnhancer.enhance() is sync; you're already inside an asyncio loop. "
+                "Use `await enhancer.aenhance(...)` instead."
+            )
+        async def _once() -> str:
+            try:
+                return await self.aenhance(user_prompt)
+            finally:
+                await self._adapter.aclose()
+        return asyncio.run(_once())
+    async def aclose(self) -> None:
+        """Release HTTP resources owned by the underlying adapter."""
+        await self._adapter.aclose()
--- a/SenseNova-U1/src/sensenova_u1/prompt_enhance/templates/infographic_system.md
+++ b/SenseNova-U1/src/sensenova_u1/prompt_enhance/templates/infographic_system.md
+# Role
+You are a world-renowned "Senior Visual Information Architect" and "AI Image Prompt Engineering Expert." You specialize in transforming fragmented or chaotic [Raw Information] into highly structured, professional Infographic Generation Prompts. Your work is defined by rigorous visual logic, precise spatial organization, and an density of useful information.
+# Task
+Reconstruct the user’s [Raw Information] into a comprehensive visual synthesis prompt (approx. 400-600 words). Your objective is to guide large image models (e.g., Gemini, Midjourney, DALL-E 3) to render an information-dense infographic featuring advanced typography, a vivid visual style, and perfect structural clarity based solely on your textual description.
+# Step-by-Step Methodology
+1. **Content Expansion & Textualization**: Analyze the [Raw Information] to extract its core intent.
+    - Detailing: Extract every entity, number, color, and phrase from the [Raw Information]. Do not summarize.
+    - Categorization: Define sub-categories with distinct visual markers.
+    - Density Enrichment: If the input is brief, supplement it with professional annotations, sub-headings, body text and Pro-tips or Key Insights related to the topic to maximize the "information load".
+2. **Adaptive Structural Analysis**:
+    - User-Defined Priority: First, check if the user has provided specific layout instructions (e.g., "three-column grid," "horizontal timeline"). If present, strictly follow these instructions.
+    - Logic-Driven Inference: If no layout is specified, analyze the [Raw Information] for its underlying logic (chronological, hierarchical, process-oriented, or comparative) and design a spatial architecture that best serves that logic.
+3. **Style Tonal Setting**: If no specific style is provided, assign a unique aesthetic that complements the content (e.g., French hand-drawn collage, modern minimalist matrix, or industrial technical blueprint).
+4. **Data Preservation & Encoding**: Ensure all numbers, dates, and proper nouns are 100% preserved. Convert these into explicit visual labels, charts, or callouts within the prompt.
+# Strict Constraints
+1. **Strict Language Parity**: Maintain absolute language consistency. If the [Raw Information] is in Chinese, the entire output must be in Chinese; if in English, the output must be in English. No code-switching.
+2. **Fidelity to [Raw Information]**: You are prohibited from omitting any proper nouns, dates, colors, or specific values provided in the input.
+3. **The "Zero Nonsense" Rule**: STRICTLY FORBIDDEN to include introductory, summary, or meta-commentary text (e.g., "Here is the refined prompt..."). Do not explain design choices or justify element omissions (e.g., do not mention "implied flow"). Start the response immediately with the visual description.
+4. **Visual Precision:
+    - Textures: Mandatorily describe background textures (e.g., off-white aged paper, light gray grid, or black halftone shadows).
+    - Typography: Explicitly specify font styles for different hierarchies (e.g., bold serif for titles, condensed mono-space for technical data).
+5. **Text Rendering Protocol**:
+    - Quotes for Content: Every piece of text intended to appear in the image MUST be enclosed in quotes.
+    - No Quotes for Style: NEVER use quotation marks for descriptions of [Style Description], [Layout Structure], colors or any non-textual elements.
+6. **Relational Arrow Logic**: Minimize the use of arrows. Rely on spatial proximity or alignment to imply connectivity. If arrows are requested, avoid generic orientations like "horizontal." Instead, specify their precise starting point and target destination.
+7. **Semantic Icon Correspondence (CRITICAL)**: You must specifically describe the visual content of every icon to ensure it matches the quoted text. (e.g., "Next to the text 'Apple' is a detailed illustration of a red delicious apple with a green leaf.") Do not use generic terms like "an icon" or "a graphic" without specifying what it is.
+8. **No Hexadecimal Codes**: Never use codes like #xxxx. Use descriptive color names (e.g., sage green, deep navy blue, terracotta).
+# Output Format (Reference Structure)
+The title of this infographic is "[Subject Name]". and adopts a [Style Description]. The overall layout is [Layout Structure], with a background of [Background Details].
+Describe the professional Infographic Generation Prompts using natural language, including [Description of elements or icons in the image], [Position], and embed the text information within it, enclosed in quotes.
+---
+Please receive the user's [Raw Information] and directly output the restructured professional image generation prompt:
\ No newline at end of file
--- a/SenseNova-U1/src/sensenova_u1/utils/__init__.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/__init__.py
+from .accel import (
+    best_available_device,
+)
+from .accel import (
+    manual_seed_all as seed_all_accelerators,
+)
+from .checkpoint_loading import (
+    add_offload_args,
+    infer_input_device,
+    load_model_and_tokenizer,
+    parse_max_memory,
+)
+from .comparison import save_compare
+from .gguf_loader import load_gguf_checkpoint, match_state_dict, set_gguf2meta_model
+from .lora import load_and_merge_lora_weight_from_safetensors
+from .offload import (
+    DEFAULT_LAYERS_ATTR,
+    DEFAULT_VRAM_MODE,
+    VRAM_MODE_OPTIONS,
+    make_offload_ctx,
+    offload_layers_async,
+    offload_layers_sync,
+    vram_mode_to_prefetch_count,
+)
+from .param_count import (
+    ModelParamInspector,
+    build_rules,
+    format_bytes,
+    format_param_count,
+)
+from .profiler import DEFAULT_IMAGE_PATCH_SIZE, InferenceProfiler
+__all__ = [
+    "DEFAULT_IMAGE_PATCH_SIZE",
+    "DEFAULT_LAYERS_ATTR",
+    "DEFAULT_VRAM_MODE",
+    "InferenceProfiler",
+    "ModelParamInspector",
+    "VRAM_MODE_OPTIONS",
+    "add_offload_args",
+    "best_available_device",
+    "build_rules",
+    "format_bytes",
+    "format_param_count",
+    "infer_input_device",
+    "load_and_merge_lora_weight_from_safetensors",
+    "load_gguf_checkpoint",
+    "load_model_and_tokenizer",
+    "make_offload_ctx",
+    "match_state_dict",
+    "offload_layers_async",
+    "offload_layers_sync",
+    "parse_max_memory",
+    "save_compare",
+    "seed_all_accelerators",
+    "set_gguf2meta_model",
+    "vram_mode_to_prefetch_count",
+]
--- a/SenseNova-U1/src/sensenova_u1/utils/accel.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/accel.py
+"""Accelerator-agnostic helpers for CUDA / XPU.
+The codebase used to hard-code ``torch.cuda.*`` for device-availability checks,
+cache management, and stream APIs. This module centralises the small amount of
+namespace switching needed so the same code paths work on both CUDA (incl.
+ROCm via the ``cuda`` namespace) and Intel XPU.
+CPU / MPS are intentionally out of scope: the layer-offload / inference path
+needs pinned host memory and dedicated transfer streams, which neither of
+those backends exposes uniformly. Targets PyTorch >= 2.5 where ``torch.xpu``
+is in-tree.
+"""
+from __future__ import annotations
+import torch
+SUPPORTED_DEVICE_TYPES: tuple[str, ...] = ("cuda", "xpu")
+def accel_module(device: torch.device):
+    """Return ``torch.cuda`` or ``torch.xpu`` matching ``device.type``."""
+    if device.type == "cuda":
+        return torch.cuda
+    if device.type == "xpu":
+        return torch.xpu
+    raise NotImplementedError(f"No accelerator namespace for device {device!r}; supported: {SUPPORTED_DEVICE_TYPES}.")
+def require_accelerator(device: torch.device) -> None:
+    """Raise unless ``device`` is a backend the inference path actually supports."""
+    if device.type not in SUPPORTED_DEVICE_TYPES:
+        raise NotImplementedError(
+            f"Inference requires a CUDA or XPU device (got {device!r}). "
+            "CPU / MPS lack the pinned-memory and stream primitives used here."
+        )
+def is_available(device_type: str) -> bool:
+    """``True`` if the backend module reports an available device."""
+    if device_type == "cuda":
+        return torch.cuda.is_available()
+    if device_type == "xpu":
+        return torch.xpu.is_available()
+    return False
+def best_available_device() -> torch.device:
+    """Pick the best available accelerator, preferring CUDA over XPU over CPU."""
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    if torch.xpu.is_available():
+        return torch.device("xpu")
+    return torch.device("cpu")
+def empty_cache(device: torch.device | None = None) -> None:
+    """Release the accelerator's caching-allocator blocks.
+    With ``device=None`` runs the equivalent of "for every backend that has a
+    device available, drop its cache" — used at teardown where we just want
+    the global state cleaned up regardless of which backend we ran on.
+    """
+    if device is None:
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        if torch.xpu.is_available():
+            torch.xpu.empty_cache()
+        return
+    if device.type in SUPPORTED_DEVICE_TYPES and is_available(device.type):
+        accel_module(device).empty_cache()
+def synchronize(device: torch.device | None = None) -> None:
+    """Block until pending work on the accelerator completes."""
+    if device is None:
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+        if torch.xpu.is_available():
+            torch.xpu.synchronize()
+        return
+    if device.type in SUPPORTED_DEVICE_TYPES and is_available(device.type):
+        accel_module(device).synchronize(device)
+def manual_seed_all(seed: int) -> None:
+    """Seed all available accelerator devices (CUDA + XPU)."""
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    if torch.xpu.is_available():
+        torch.xpu.manual_seed_all(seed)
--- a/SenseNova-U1/src/sensenova_u1/utils/checkpoint_loading.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/checkpoint_loading.py
+"""Shared model + tokenizer loader for SenseNova-U1.
+Centralises the ``AutoConfig`` / ``AutoTokenizer`` / ``AutoModel`` calls used
+by the example scripts and the ComfyUI app, and adds an optional GGUF
+checkpoint override.
+Usage:
+    from sensenova_u1.utils import load_model_and_tokenizer
+    model, tokenizer = load_model_and_tokenizer(
+        model_path="sensenova/SenseNova-U1-8B-MoT",
+        dtype=torch.bfloat16,
+        # device=None auto-picks CUDA > XPU > CPU. Pass an explicit
+        # "cuda" / "cuda:0" / "xpu" / "xpu:0" to override.
+    )
+    # GGUF override (config / tokenizer still come from `model_path`):
+    model, tokenizer = load_model_and_tokenizer(
+        model_path="sensenova/SenseNova-U1-8B-MoT",
+        dtype=torch.bfloat16,
+        gguf_checkpoint="/path/to/SenseNova-U1-8B-MoT-Q5_K_M.gguf",
+    )
+"""
+from __future__ import annotations
+import argparse
+import gc
+import json
+import logging
+from pathlib import Path
+from typing import Any
+import torch
+from torch import nn
+from . import accel
+LOGGER = logging.getLogger(__name__)
+def _default_device() -> torch.device:
+    """Pick CUDA, then XPU, then CPU. Used as the default ``device`` for loaders."""
+    return accel.best_available_device()
+def add_offload_args(parser: argparse.ArgumentParser) -> None:
+    """Add Transformers/Accelerate device-map and layer-offload flags to an example CLI."""
+    from .offload import DEFAULT_VRAM_MODE, VRAM_MODE_OPTIONS
+    parser.add_argument(
+        "--device_map",
+        default=None,
+        help=(
+            "Optional Transformers device_map, e.g. 'auto', 'balanced', "
+            "'balanced_low_0', or 'sequential'. When set, the model is loaded "
+            "with Accelerate dispatch and is not moved again with .to(device). "
+            "Use this for multi-GPU split; for low-VRAM single-card, prefer --vram_mode."
+        ),
+    )
+    parser.add_argument(
+        "--max_memory",
+        default=None,
+        help=(
+            "Optional per-device memory limits for --device_map, either JSON "
+            "or comma-separated KEY=VALUE pairs, e.g. '0=20GiB,1=20GiB'."
+        ),
+    )
+    parser.add_argument(
+        "--vram_mode",
+        choices=list(VRAM_MODE_OPTIONS),
+        default=DEFAULT_VRAM_MODE,
+        help=(
+            "Single-GPU layer-offload mode. "
+            "'full' = no offload, whole model on GPU, fastest (default). "
+            "'low' = synchronous per-layer CPU<->GPU swap, smallest weight footprint. "
+            "'balanced' = async prefetch, overlaps H2D with compute, faster than 'low'. "
+            "Mutually exclusive with --device_map (layer offload requires the model on CPU)."
+        ),
+    )
+def infer_input_device(model: nn.Module, fallback: str | torch.device | None = None) -> torch.device:
+    """Pick a usable device for tensors passed into a dispatched model.
+    When ``fallback`` is ``None`` (the default), auto-detects the best
+    accelerator (CUDA > XPU > CPU).
+    """
+    for param in model.parameters():
+        if param.device.type not in {"cpu", "meta"}:
+            return param.device
+    if fallback is None:
+        return _default_device()
+    return torch.device(fallback) if isinstance(fallback, str) else fallback
+def _resolve_local_model_path(model_path: str) -> str:
+    """Resolve a HF id to its cached snapshot directory when offline.
+    Mirrors transformers' fall-back behaviour but skips the up-front HEAD
+    request that times out on offline machines. Returns the input unchanged
+    if the path already exists or no cached snapshot is found.
+    """
+    if Path(model_path).exists():
+        return model_path
+    try:
+        from huggingface_hub import snapshot_download
+        return snapshot_download(model_path, local_files_only=True)
+    except Exception:
+        return model_path
+def load_model_and_tokenizer(
+    model_path: str,
+    *,
+    dtype: torch.dtype,
+    device: str | torch.device | None = None,
+    gguf_checkpoint: str | None = None,
+    device_map: str | None = None,
+    max_memory: str | dict[int | str, str] | None = None,
+    for_offload: bool = False,
+) -> tuple[nn.Module, Any]:
+    """Build a SenseNova-U1 model + tokenizer pair.
+    ``model_path`` always provides the config and tokenizer (HF id or local
+    directory containing ``config.json``).
+    Weight loading branches on ``gguf_checkpoint``:
+    - ``None``: standard ``AutoModel.from_pretrained(model_path, ...)``.
+      The ``device_map`` / ``max_memory`` accelerate kwargs apply on this
+      path; when ``device_map`` is ``None`` the model is ``.to(device)``
+      after loading.
+    - ``"*.gguf"``: build a meta-init model from the config and inject
+      dequantizing weights from the GGUF file via the diffusers quantizer.
+      The accelerate kwargs are ignored on this path.
+    When ``for_offload=True`` the loaded model stays on CPU (no ``.to(device)``)
+    so a downstream layer-offload wrapper can manage CPU<->GPU movement
+    itself. ``device_map`` is forced to ``None`` in this mode (with a warning)
+    because accelerate's static placement is incompatible with dynamic offload.
+    """
+    from transformers import AutoConfig, AutoModel, AutoTokenizer
+    from .. import check_checkpoint_compatibility
+    if for_offload and device_map:
+        LOGGER.warning(
+            "for_offload=True overrides device_map=%r (accelerate placement is incompatible with layer offload).",
+            device_map,
+        )
+        device_map = None
+    if device is None and not device_map and not for_offload:
+        device = _default_device()
+    model_path = _resolve_local_model_path(model_path)
+    config = AutoConfig.from_pretrained(model_path)
+    check_checkpoint_compatibility(config)
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    if gguf_checkpoint is not None:
+        gguf_device = torch.device("cpu") if for_offload else device
+        model = _load_from_gguf(config, gguf_checkpoint, dtype=dtype, device=gguf_device)
+    else:
+        model_kwargs: dict[str, Any] = {"config": config, "torch_dtype": dtype}
+        if device_map:
+            model_kwargs["device_map"] = device_map
+            parsed_max_memory = _normalize_max_memory(max_memory)
+            if parsed_max_memory:
+                model_kwargs["max_memory"] = parsed_max_memory
+        model = AutoModel.from_pretrained(model_path, **model_kwargs).eval()
+        if not device_map and device is not None and not for_offload:
+            model = model.to(device)
+    return model, tokenizer
+def _normalize_max_memory(value: str | dict | None) -> dict[int | str, str]:
+    """Accept a parsed mapping, JSON object, or comma-separated CLI form ``"0=20GiB,cpu=64GiB"``."""
+    if value is None or value == "":
+        return {}
+    if isinstance(value, dict):
+        return {_coerce_memory_key(k): str(v) for k, v in value.items()}
+    stripped = value.strip()
+    if stripped.startswith("{"):
+        raw = json.loads(stripped)
+        if not isinstance(raw, dict):
+            raise RuntimeError("max_memory JSON must be an object")
+        return {_coerce_memory_key(k): str(v) for k, v in raw.items()}
+    result: dict[int | str, str] = {}
+    for item in stripped.split(","):
+        item = item.strip()
+        if not item:
+            continue
+        if "=" not in item:
+            raise RuntimeError("max_memory entries must look like 0=20GiB,cpu=64GiB.")
+        key, memory = item.split("=", 1)
+        key = key.strip()
+        memory = memory.strip()
+        if not key or not memory:
+            raise RuntimeError("max_memory entries must include both device and memory.")
+        result[_coerce_memory_key(key)] = memory
+    return result
+def _coerce_memory_key(key: object) -> int | str:
+    if isinstance(key, int):
+        return key
+    key_str = str(key)
+    return int(key_str) if key_str.isdigit() else key_str
+parse_max_memory = _normalize_max_memory
+def _load_from_gguf(
+    config,
+    gguf_checkpoint: str,
+    *,
+    dtype: torch.dtype,
+    device: str | torch.device | None,
+) -> nn.Module:
+    try:
+        from accelerate import init_empty_weights
+    except ImportError as exc:
+        raise RuntimeError("GGUF loading requires `accelerate`; install it in your environment.") from exc
+    from transformers import AutoModel
+    from .gguf_loader import load_gguf_checkpoint, set_gguf2meta_model
+    print(f"[gguf] loading quantized checkpoint from {gguf_checkpoint}")
+    with init_empty_weights():
+        model = AutoModel.from_config(config)
+    state_dict = load_gguf_checkpoint(gguf_checkpoint)
+    print(f"[gguf] parsed {len(state_dict)} tensors")
+    target_device = torch.device(device) if isinstance(device, str) else device
+    # set_gguf2meta_model places weights on `target_device` while injecting;
+    # callers that ultimately want a different device can `.to()` afterwards.
+    set_gguf2meta_model(model, state_dict, dtype, target_device)
+    n_gguf_linear = sum(1 for m in model.modules() if type(m).__name__ == "GGUFLinear")
+    print(f"[gguf] {n_gguf_linear} GGUFLinear modules active (dequantized at forward time)")
+    if n_gguf_linear == 0:
+        print("[gguf] WARNING: no GGUFLinear modules found — quantizer hook did not run as expected")
+    del state_dict
+    gc.collect()
+    accel.empty_cache()
+    return model.eval()
--- a/SenseNova-U1/src/sensenova_u1/utils/comparison.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/comparison.py
+from __future__ import annotations
+import re
+from pathlib import Path
+from typing import Sequence
+from PIL import Image, ImageDraw, ImageFont
+__all__ = ["make_comparison", "save_compare"]
+# Tokens for pixel-aware wrap: ASCII word, whitespace run, or a single CJK char.
+_WRAP_TOKEN_RE = re.compile(r"\s+|[\u4e00-\u9fff]|[^\s\u4e00-\u9fff]+")
+_CJK_RE = re.compile(r"[\u4e00-\u9fff]")
+# Font search order: CJK-capable first so Chinese prompts render properly.
+_CJK_FONTS = (
+    "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+    "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+    "/usr/share/fonts/google-noto-cjk-vf-fonts/NotoSansCJK-VF.otf.ttc",
+    "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+    "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+    "./fonts/Noto_Sans_SC/static/NotoSansSC-Regular.ttf",
+)
+_LATIN_FONTS = (
+    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+    "/usr/share/fonts/dejavu/DejaVuSans.ttf",
+    "DejaVuSans.ttf",
+)
+_warned_missing_cjk = False
+def _load_font(size: int) -> tuple[ImageFont.ImageFont | ImageFont.FreeTypeFont, bool]:
+    """Return (font, has_cjk_coverage). Falls back to PIL default if nothing usable."""
+    for path in _CJK_FONTS:
+        try:
+            return ImageFont.truetype(path, size=size), True
+        except OSError:
+            continue
+    for path in _LATIN_FONTS:
+        try:
+            return ImageFont.truetype(path, size=size), False
+        except OSError:
+            continue
+    try:
+        return ImageFont.load_default(size=size), False
+    except TypeError:
+        return ImageFont.load_default(), False
+def _wrap_text(text: str, font, max_width: int) -> list[str]:
+    """Greedy pixel-aware wrap. Keeps ASCII words intact, splits CJK per-char."""
+    lines: list[str] = []
+    for paragraph in text.split("\n"):
+        cur = ""
+        for tok in _WRAP_TOKEN_RE.findall(paragraph):
+            candidate = cur + tok
+            if font.getlength(candidate.rstrip()) <= max_width:
+                cur = candidate
+                continue
+            if cur.strip():
+                lines.append(cur.rstrip())
+            cur = "" if tok.isspace() else tok
+        if cur.rstrip():
+            lines.append(cur.rstrip())
+    return lines or [""]
+def make_comparison(
+    inputs: Sequence[Image.Image],
+    output: Image.Image,
+    prompt: str,
+    *,
+    pad: int = 16,
+    bg: tuple[int, int, int] = (255, 255, 255),
+) -> Image.Image:
+    """Return ``[inputs... | output]`` stacked horizontally with ``prompt`` below.
+    Inputs are letterboxed to match the output's height so the row lines up
+    cleanly regardless of aspect ratio.
+    """
+    row_h = output.size[1]
+    row_imgs: list[Image.Image] = []
+    for im in inputs:
+        if im.size[1] != row_h:
+            new_w = max(1, round(im.size[0] * row_h / im.size[1]))
+            im = im.resize((new_w, row_h), Image.LANCZOS)
+        row_imgs.append(im)
+    row_imgs.append(output)
+    row_w = sum(im.size[0] for im in row_imgs) + pad * (len(row_imgs) + 1)
+    font, has_cjk = _load_font(max(18, row_h // 30))
+    global _warned_missing_cjk
+    if not has_cjk and _CJK_RE.search(prompt) and not _warned_missing_cjk:
+        print(
+            "[compare] prompt contains CJK but no CJK-capable font was found; "
+            "Chinese characters will render as tofu. Install e.g. `fonts-noto-cjk` "
+            "(Debian/Ubuntu) or `google-noto-cjk-fonts` (RHEL-family) for proper rendering."
+        )
+        _warned_missing_cjk = True
+    lines = _wrap_text(prompt, font, row_w - pad * 2)
+    bbox = font.getbbox("Ag中")
+    line_h = max(1, int((bbox[3] - bbox[1]) * 1.3))
+    text_h = line_h * len(lines) + pad * 2
+    canvas = Image.new("RGB", (row_w, row_h + pad * 2 + text_h), bg)
+    x = pad
+    for im in row_imgs:
+        canvas.paste(im, (x, pad))
+        x += im.size[0] + pad
+    draw = ImageDraw.Draw(canvas)
+    y = row_h + pad * 2
+    for line in lines:
+        draw.text((pad, y), line, fill=(0, 0, 0), font=font)
+        y += line_h
+    return canvas
+def save_compare(
+    out_path: Path,
+    inputs: Sequence[Image.Image],
+    output: Image.Image,
+    prompt: str,
+) -> None:
+    """Save a comparison next to ``out_path`` as ``<stem>_compare<suffix>``."""
+    cmp_path = out_path.with_name(f"{out_path.stem}_compare{out_path.suffix}")
+    make_comparison(inputs, output, prompt).save(cmp_path)
+    print(f"[saved] {cmp_path}")
--- a/SenseNova-U1/src/sensenova_u1/utils/gguf_loader.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/gguf_loader.py
+"""GGUF checkpoint loader for transformers/diffusers-style models.
+Public API:
+    load_gguf_checkpoint(path) -> dict[str, Tensor | GGUFParameter]
+    set_gguf2meta_model(meta_model, state_dict, dtype, device) -> nn.Module
+    match_state_dict(meta_model, state_dict, show_num=10) -> dict  # debug helper
+"""
+from __future__ import annotations
+import gc
+import torch
+from torch import nn
+def load_gguf_checkpoint(gguf_checkpoint_path: str) -> dict:
+    """Parse a .gguf file into a state-dict-compatible mapping.
+    F32 / F16 tensors come back as plain torch tensors; everything else is
+    wrapped in ``GGUFParameter`` so the diffusers quantizer can dequantize on
+    the fly during forward.
+    """
+    from diffusers.utils import is_gguf_available, is_torch_available
+    if not (is_gguf_available() and is_torch_available()):
+        raise ImportError("Please install torch and gguf>=0.10.0 to load a GGUF checkpoint.")
+    import gguf
+    from diffusers.quantizers.gguf.utils import SUPPORTED_GGUF_QUANT_TYPES, GGUFParameter
+    from gguf import GGUFReader
+    reader = GGUFReader(gguf_checkpoint_path)
+    parsed: dict = {}
+    for tensor in reader.tensors:
+        name = tensor.name
+        quant_type = tensor.tensor_type
+        is_quant = quant_type not in (gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16)
+        if is_quant and quant_type not in SUPPORTED_GGUF_QUANT_TYPES:
+            supported = "\n".join(str(t) for t in SUPPORTED_GGUF_QUANT_TYPES)
+            raise ValueError(f"{name} has unsupported quant type {quant_type}.\nSupported:\n{supported}")
+        weights = torch.from_numpy(tensor.data.copy())
+        parsed[name] = GGUFParameter(weights, quant_type=quant_type) if is_quant else weights
+        del tensor, weights
+    del reader
+    gc.collect()
+    return parsed
+def set_gguf2meta_model(
+    meta_model: nn.Module,
+    model_state_dict: dict,
+    dtype: torch.dtype,
+    device: torch.device | None,
+) -> nn.Module:
+    """Inject GGUF weights into a meta-initialized model.
+    The model **must** have been built with ``accelerate.init_empty_weights()``
+    so its parameters live on the meta device. This function:
+      1. Replaces ``nn.Linear`` modules with ``GGUFLinear`` (via the quantizer hook).
+      2. Loads the parsed state-dict into those modules.
+      3. Returns the model cast to ``dtype`` (non-quant params only).
+    """
+    from diffusers import GGUFQuantizationConfig
+    from diffusers.models.model_loading_utils import load_model_dict_into_meta
+    from diffusers.quantizers.gguf import GGUFQuantizer
+    g_config = GGUFQuantizationConfig(compute_dtype=dtype or torch.bfloat16)
+    hf_quantizer = GGUFQuantizer(quantization_config=g_config)
+    hf_quantizer.pre_quantized = True  # required: weights are already quantized
+    device_map = {"": device} if device is not None else None
+    hf_quantizer._process_model_before_weight_loading(meta_model, device_map=device_map, state_dict=model_state_dict)
+    load_model_dict_into_meta(
+        meta_model,
+        model_state_dict,
+        hf_quantizer=hf_quantizer,
+        device_map=device_map,
+        dtype=dtype,
+    )
+    hf_quantizer._process_model_after_weight_loading(meta_model)
+    del model_state_dict
+    gc.collect()
+    return meta_model.to(dtype=dtype)
+def match_state_dict(meta_model: nn.Module, sd: dict, show_num: int = 10) -> dict:
+    """Debug helper: report how well a parsed state-dict matches a model.
+    Returns a dict with counts/sets for programmatic checks.
+    """
+    model_keys = set(meta_model.state_dict().keys())
+    sd_keys = set(sd.keys())
+    matching = model_keys & sd_keys
+    extra = sd_keys - model_keys
+    missing = model_keys - sd_keys
+    print(f"[gguf] matching keys: {len(matching)}")
+    if extra:
+        print(f"[gguf] extra in state_dict (not in model): {len(extra)}")
+        for k in list(extra)[:show_num]:
+            print(f"  + {k}")
+    if missing:
+        print(f"[gguf] missing in state_dict (in model only): {len(missing)}")
+        for k in list(missing)[:show_num]:
+            print(f"  - {k}")
+    print(f"[gguf] sample matches: {list(matching)[:5]}")
+    return {
+        "matching": len(matching),
+        "extra": extra,
+        "missing": missing,
+    }
--- a/SenseNova-U1/src/sensenova_u1/utils/layer_offload.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/layer_offload.py
+"""Layer offload wrapper for memory-efficient inference.
+Keeps each layer of an ``nn.ModuleList`` in CPU pinned memory and moves it
+onto an accelerator device (CUDA or XPU) on demand. Two modes share a single
+:class:`LayerOffloadWrapper`:
+- ``prefetch_count == 0`` — synchronous: load before forward, evict after.
+- ``prefetch_count >= 1`` — asynchronous: a dedicated CUDA stream prefetches
+  the next ``prefetch_count`` layers so the H2D copy overlaps compute.
+General-purpose: works with any ``nn.Module`` whose forward iterates over a
+``nn.ModuleList`` attribute (``transformer_blocks``, ``layers``, …). Each
+layer is evicted back to CPU immediately after its forward completes; in
+async mode prefetch wraps around modulo the layer count so the last layer's
+prefetch warms up early layers for the next forward pass.
+Inference-only — the eviction-after-forward design destroys gradient flow,
+so :meth:`__init__` rejects models in training mode.
+Origin: adapted from `Lightricks/LTX-2 <https://github.com/Lightricks/LTX-2>`_.
+Example
+-------
+>>> model = build_my_model(device=torch.device("cpu")).eval()
+>>> model = LayerOffloadWrapper(
+...     model,
+...     layers_attr="transformer_blocks",
+...     target_device=torch.device("cuda:0"),
+...     prefetch_count=2,
+... )
+>>> out = model(inputs)
+>>> model.teardown()
+"""
+from __future__ import annotations
+import functools
+import itertools
+import logging
+from typing import Any
+import torch
+from torch import nn
+from .accel import accel_module as _accel
+from .accel import require_accelerator as _require_accelerator
+logger = logging.getLogger(__name__)
+def _log_vram(label: str, target_device: torch.device, *, reset_peak: bool = False) -> None:
+    """Cheap VRAM snapshot for diagnosing offload-mode leaks across repeated
+    runs (notably under ComfyUI). Never raises; logs at INFO so it shows up
+    without explicit DEBUG opt-in.
+    """
+    try:
+        accel = _accel(target_device)
+        if not accel.is_available():
+            return
+        alloc = accel.memory_allocated(target_device) / (1024**3)
+        reserved = accel.memory_reserved(target_device) / (1024**3)
+        peak = accel.max_memory_allocated(target_device) / (1024**3)
+        logger.info(
+            "[layer_offload vram] %-40s | alloc=%6.2f GiB  reserved=%6.2f GiB  peak=%6.2f GiB",
+            label,
+            alloc,
+            reserved,
+            peak,
+        )
+        if reset_peak:
+            accel.reset_peak_memory_stats(target_device)
+    except Exception as exc:  # pragma: no cover - diagnostic only
+        logger.debug("vram log %r failed: %s", label, exc)
+def _resolve_attr(module: nn.Module, dotted_path: str) -> nn.ModuleList:
+    """Resolve a dotted attribute path like ``'model.language_model.layers'``."""
+    obj: Any = module
+    for part in dotted_path.split("."):
+        obj = getattr(obj, part)
+    if not isinstance(obj, nn.ModuleList):
+        raise TypeError(f"Expected nn.ModuleList at '{dotted_path}', got {type(obj).__name__}")
+    return obj
+def _is_cuda_malloc_async_backend() -> bool:
+    """Detect whether the active CUDA caching allocator is ``cudaMallocAsync``.
+    The native caching allocator and ``cudaMallocAsync`` differ on a point
+    that matters for our cross-stream prefetch: ``cudaMallocAsync`` keeps a
+    pool *per stream* and never reuses freed blocks across streams without
+    explicit ordering, so allocating on the prefetch stream and freeing on
+    the compute stream causes the reserved pool to grow without bound. The
+    native allocator handles this case with ``record_stream`` and reuses
+    blocks freely.
+    ComfyUI launchers commonly set
+    ``PYTORCH_CUDA_ALLOC_CONF=backend:cudaMallocAsync``; standalone Python
+    runs typically don't.
+    """
+    try:
+        return torch.cuda.is_available() and torch.cuda.get_allocator_backend() == "cudaMallocAsync"
+    except Exception:
+        return False
+def _audit_lazy_state(
+    model: nn.Module,
+    target_device: torch.device,
+    managed_tensor_ids: set[int],
+) -> int:
+    """Move any params/buffers stranded off ``target_device`` after the first
+    forward (lazy buffers materialised inside ``forward()``) onto it.
+    Returns the number of tensors moved. Tensors already managed by the
+    offload store are skipped — they are intentionally rotated between
+    pinned CPU and GPU. Anything else that ends up on the wrong device is
+    almost certainly a lazy buffer (e.g. an attention mask cache) that the
+    constructor could not see, and it lives on GPU permanently from here on.
+    """
+    moved = 0
+    for tensor in itertools.chain(model.parameters(), model.buffers()):
+        if id(tensor) in managed_tensor_ids:
+            continue
+        if tensor.device != target_device:
+            tensor.data = tensor.data.to(target_device)
+            moved += 1
+    return moved
+class _LayerStore:
+    """Holds CPU-pinned copies of every parameter/buffer of every offloaded layer.
+    Tracks which layers currently reside on GPU so the prefetcher and evictor
+    can make correct decisions in async mode. In sync mode the bookkeeping is
+    free overhead.
+    """
+    def __init__(self, layers: nn.ModuleList, target_device: torch.device) -> None:
+        self.target_device = target_device
+        self.num_layers = len(layers)
+        # ``Tensor.pin_memory()`` defaults to CUDA; XPU needs an explicit
+        # device kind so the host buffer is registered with the right driver.
+        self._pin_device = target_device.type
+        self._pinned: list[dict[str, torch.Tensor]] = []
+        self._on_gpu: set[int] = set()
+        for layer in layers:
+            pinned: dict[str, torch.Tensor] = {}
+            for name, tensor in itertools.chain(layer.named_parameters(), layer.named_buffers()):
+                pinned_tensor = tensor.data.pin_memory(device=self._pin_device)
+                tensor.data = pinned_tensor
+                pinned[name] = pinned_tensor
+            self._pinned.append(pinned)
+    def _check_idx(self, idx: int) -> None:
+        if idx < 0 or idx >= self.num_layers:
+            raise IndexError(f"Layer index {idx} out of range [0, {self.num_layers})")
+    def is_on_gpu(self, idx: int) -> bool:
+        return idx in self._on_gpu
+    def move_to_gpu(self, idx: int, layer: nn.Module, *, non_blocking: bool = False) -> None:
+        """Move layer *idx* parameters from pinned CPU to ``target_device``."""
+        self._check_idx(idx)
+        if idx in self._on_gpu:
+            return
+        pinned = self._pinned[idx]
+        for name, param in itertools.chain(layer.named_parameters(), layer.named_buffers()):
+            if name in pinned:
+                param.data = pinned[name].to(self.target_device, non_blocking=non_blocking)
+        self._on_gpu.add(idx)
+    def evict_to_cpu(self, idx: int, layer: nn.Module) -> None:
+        """Swap layer *idx* parameters back to their pinned CPU copies."""
+        self._check_idx(idx)
+        if idx not in self._on_gpu:
+            return
+        pinned = self._pinned[idx]
+        for name, param in itertools.chain(layer.named_parameters(), layer.named_buffers()):
+            if name in pinned:
+                param.data = pinned[name]
+        self._on_gpu.discard(idx)
+    def managed_tensor_ids(self) -> set[int]:
+        ids: set[int] = set()
+        for pinned in self._pinned:
+            for t in pinned.values():
+                ids.add(id(t))
+        return ids
+    def cleanup(self) -> None:
+        """Drop the pinned-tensor refs so they can be freed by the GC."""
+        for pinned_dict in self._pinned:
+            pinned_dict.clear()
+        self._pinned.clear()
+        self._on_gpu.clear()
+class _AsyncPrefetcher:
+    """Issues H2D transfers on a dedicated CUDA stream.
+    Uses per-layer CUDA events so that the compute stream only waits for the
+    specific layer it needs, not all pending transfers.
+    """
+    def __init__(self, store: _LayerStore, layers: nn.ModuleList) -> None:
+        self._store = store
+        self._layers = layers
+        self._accel = _accel(store.target_device)
+        self._stream = self._accel.Stream(device=store.target_device)
+        self._events: dict[int, Any] = {}
+    def prefetch(self, idx: int) -> None:
+        """Begin async transfer of layer *idx* to GPU (no-op if already there)."""
+        if self._store.is_on_gpu(idx) or idx in self._events:
+            return
+        with self._accel.stream(self._stream):
+            self._store.move_to_gpu(idx, self._layers[idx], non_blocking=True)
+            event = self._accel.Event()
+            event.record(self._stream)
+            self._events[idx] = event
+    def wait(self, idx: int) -> None:
+        """Block the compute stream until layer *idx*'s transfer completes."""
+        event = self._events.pop(idx, None)
+        if event is not None:
+            self._accel.current_stream(self._store.target_device).wait_event(event)
+    def cleanup(self) -> None:
+        """Drain pending work and release accelerator stream/event resources."""
+        self._events.clear()
+        self._stream = None
+        self._layers = None
+        self._store = None
+        self._accel = None
+class LayerOffloadWrapper(nn.Module):
+    """Wraps a model to offload its sequential layers between CPU and GPU.
+    Each layer is evicted immediately after its forward completes. With
+    ``prefetch_count == 0`` the wrapper runs in synchronous mode (one layer
+    on GPU at a time, no extra stream). With ``prefetch_count >= 1`` it
+    pre-stages the next layers on a dedicated CUDA stream so H2D overlaps
+    compute, with up to ``1 + prefetch_count`` layers resident on GPU.
+    Parameters
+    ----------
+    model:
+        The model to wrap, with all parameters on **CPU** and in eval mode.
+    layers_attr:
+        Dotted attribute path to the ``nn.ModuleList`` of sequential layers
+        (e.g. ``"transformer_blocks"`` or ``"language_model.model.layers"``).
+    target_device:
+        The accelerator device to use for compute (CUDA or XPU). CPU / MPS
+        are rejected.
+    prefetch_count:
+        ``0`` = synchronous (per-layer load/evict, lowest VRAM, slowest).
+        ``>= 1`` = async prefetch this many layers ahead (faster, more VRAM).
+    """
+    def __init__(
+        self,
+        model: nn.Module,
+        layers_attr: str,
+        target_device: torch.device,
+        prefetch_count: int = 0,
+    ) -> None:
+        super().__init__()
+        _require_accelerator(target_device)
+        if prefetch_count < 0:
+            raise ValueError("prefetch_count must be >= 0")
+        if model.training:
+            raise RuntimeError(
+                "LayerOffloadWrapper only supports inference; the per-forward "
+                "evict-to-CPU step destroys gradient flow. Call model.eval() first."
+            )
+        self._model = model
+        self._layers = _resolve_attr(model, layers_attr)
+        self._target_device = target_device
+        self._accel = _accel(target_device)
+        # Clamp: no point prefetching more layers than (num_layers - 1).
+        max_prefetch = max(len(self._layers) - 1, 0)
+        self._prefetch_count = min(prefetch_count, max_prefetch)
+        self._async_mode = self._prefetch_count >= 1
+        # ``cudaMallocAsync`` keeps per-stream memory pools and never reuses
+        # freed blocks across streams without explicit ordering. Detect the
+        # backend at construction time so the hooks can pick the right
+        # alloc/free pairing strategy: native allocator → record_stream
+        # (fast, frees go to whatever stream is current); cudaMallocAsync →
+        # wait_stream + free on prefetch stream (correct, slightly more
+        # serialized). Only meaningful for CUDA; XPU always uses the native
+        # caching allocator and takes the record_stream fast path.
+        self._cuda_malloc_async = target_device.type == "cuda" and _is_cuda_malloc_async_backend()
+        if self._async_mode:
+            logger.info(
+                "LayerOffloadWrapper: async prefetch enabled (prefetch_count=%d, allocator=%s, free_path=%s)",
+                self._prefetch_count,
+                "cudaMallocAsync" if self._cuda_malloc_async else "native",
+                "prefetch-stream + wait_stream" if self._cuda_malloc_async else "compute-stream + record_stream",
+            )
+        self._hooks: list[torch.utils.hooks.RemovableHandle] = []
+        self._audit_handle: torch.utils.hooks.RemovableHandle | None = None
+        self._prefetcher: _AsyncPrefetcher | None = None
+        _log_vram("wrapper.__init__: pre-setup", target_device, reset_peak=True)
+        self._setup()
+        _log_vram(
+            f"wrapper.__init__: post-setup (async={self._async_mode}, "
+            f"prefetch={self._prefetch_count}, layers={len(self._layers)})",
+            target_device,
+        )
+    # ------------------------------------------------------------------
+    # Setup / teardown
+    # ------------------------------------------------------------------
+    def _setup(self) -> None:
+        # 1. Pin all layer tensors in CPU memory.
+        self._store = _LayerStore(self._layers, self._target_device)
+        # 2. Move all NON-layer params/buffers to GPU permanently.
+        layer_tensor_ids: set[int] = set()
+        for layer in self._layers:
+            for t in itertools.chain(layer.parameters(), layer.buffers()):
+                layer_tensor_ids.add(id(t))
+        for p in self._model.parameters():
+            if id(p) not in layer_tensor_ids:
+                p.data = p.data.to(self._target_device)
+        for b in self._model.buffers():
+            if id(b) not in layer_tensor_ids:
+                b.data = b.data.to(self._target_device)
+        # 3. In async mode: pre-load the first (1 + prefetch_count) layers and
+        #    spin up the prefetch stream.
+        if self._async_mode:
+            for idx in range(min(self._prefetch_count + 1, len(self._layers))):
+                self._store.move_to_gpu(idx, self._layers[idx])
+            self._prefetcher = _AsyncPrefetcher(self._store, self._layers)
+        # 4. Register layer load/evict hooks.
+        self._register_hooks()
+        # 5. One-shot audit: catch lazy params/buffers materialised inside the
+        #    first forward (RoPE caches, attention masks, etc.) that escaped
+        #    the construction-time scan.
+        self._audit_handle = self._model.register_forward_hook(self._audit_first_forward)
+    def _register_hooks(self) -> None:
+        idx_map: dict[int, int] = {id(layer): idx for idx, layer in enumerate(self._layers)}
+        num_layers = len(self._layers)
+        def _pre_hook(module: nn.Module, _args: Any, *, idx: int) -> None:
+            if self._async_mode:
+                # Wait only for THIS layer's H2D transfer.
+                self._prefetcher.wait(idx)  # type: ignore[union-attr]
+                if not self._store.is_on_gpu(idx):
+                    self._store.move_to_gpu(idx, module)
+                if not self._cuda_malloc_async:
+                    # Native caching allocator fast path: tell the allocator
+                    # the compute stream will read these weights so it does
+                    # not reuse the blocks while the kernel is still running.
+                    # Frees in _post_hook go to whatever stream is current
+                    # (compute stream) and the allocator handles cross-stream
+                    # reuse internally — no prefetch-stream barrier needed.
+                    compute_stream = self._accel.current_stream(self._target_device)
+                    for param in itertools.chain(module.parameters(), module.buffers()):
+                        param.data.record_stream(compute_stream)
+                # Kick off prefetch for upcoming layers (wraps around for next pass).
+                for offset in range(1, self._prefetch_count + 1):
+                    self._prefetcher.prefetch((idx + offset) % num_layers)  # type: ignore[union-attr]
+            else:
+                # Sync mode: the H2D dispatches on the compute stream itself,
+                # which serialises naturally with the kernel that follows.
+                self._store.move_to_gpu(idx, module, non_blocking=True)
+        def _post_hook(module: nn.Module, _args: Any, _output: Any, *, idx: int) -> None:
+            if self._async_mode and self._cuda_malloc_async:
+                # cudaMallocAsync slow-but-safe path: per-stream pools
+                # require alloc and free on the same stream. Since
+                # `_AsyncPrefetcher` allocates layer weights on the prefetch
+                # stream, we must also free them there. Wait for the compute
+                # stream to finish reading the weights first; wait_stream is
+                # host-async so it does not stall Python. The cost is that
+                # subsequent prefetches queued on the prefetch stream are
+                # ordered after this wait, slightly reducing pipeline depth.
+                prefetch_stream = self._prefetcher._stream  # type: ignore[union-attr]
+                compute_stream = self._accel.current_stream(self._target_device)
+                prefetch_stream.wait_stream(compute_stream)
+                with self._accel.stream(prefetch_stream):
+                    self._store.evict_to_cpu(idx, module)
+            else:
+                # Native allocator path: just drop the GPU tensor refs on
+                # the compute stream. record_stream in _pre_hook ensures the
+                # blocks are not reused before the kernel finishes.
+                self._store.evict_to_cpu(idx, module)
+        for layer in self._layers:
+            idx = idx_map[id(layer)]
+            h1 = layer.register_forward_pre_hook(functools.partial(_pre_hook, idx=idx))
+            h2 = layer.register_forward_hook(functools.partial(_post_hook, idx=idx))
+            self._hooks.extend([h1, h2])
+    def _audit_first_forward(self, _module: nn.Module, _inputs: Any, _outputs: Any) -> None:
+        _log_vram("wrapper.audit: pre", self._target_device)
+        moved = _audit_lazy_state(self._model, self._target_device, self._store.managed_tensor_ids())
+        if moved:
+            logger.warning(
+                "LayerOffloadWrapper: moved %d lazy param(s)/buffer(s) onto %s after "
+                "the first forward. These will stay on GPU; they are not offloaded.",
+                moved,
+                self._target_device,
+            )
+        _log_vram(f"wrapper.audit: post (moved={moved})", self._target_device)
+        if self._audit_handle is not None:
+            self._audit_handle.remove()
+            self._audit_handle = None
+    def teardown(self) -> None:
+        """Remove hooks, release pinned memory, and move parameters back to CPU.
+        After this call the wrapper is inert: hooks are removed, the prefetch
+        stream is drained and destroyed, all parameters reside on regular
+        (non-pinned) CPU memory, and the :class:`_LayerStore` pinned-tensor
+        cache is cleared.
+        """
+        _log_vram(
+            f"wrapper.teardown: enter (on_gpu={len(self._store._on_gpu)}, "
+            f"events={len(self._prefetcher._events) if self._prefetcher is not None else 0})",
+            self._target_device,
+        )
+        for h in self._hooks:
+            h.remove()
+        self._hooks.clear()
+        if self._audit_handle is not None:
+            self._audit_handle.remove()
+            self._audit_handle = None
+        # Drain in-flight H2D copies before tearing down stream resources, or
+        # the accelerator driver can hit use-after-free during cleanup.
+        self._accel.synchronize(device=self._target_device)
+        if self._prefetcher is not None:
+            self._prefetcher.cleanup()
+            self._prefetcher = None
+        for idx, layer in enumerate(self._layers):
+            self._store.evict_to_cpu(idx, layer)
+        for p in self._model.parameters():
+            p.data = p.data.to("cpu")
+        for b in self._model.buffers():
+            b.data = b.data.to("cpu")
+        self._store.cleanup()
+        _log_vram("wrapper.teardown: exit (pre-empty_cache)", self._target_device)
+    # ------------------------------------------------------------------
+    # Forward and attribute delegation
+    # ------------------------------------------------------------------
+    def forward(self, *args: Any, **kwargs: Any) -> Any:
+        return self._model(*args, **kwargs)
+    def __getattr__(self, name: str) -> Any:
+        """Proxy attribute access to the wrapped model.
+        ``nn.Module.__getattr__`` is only called when normal lookup fails, so
+        ``_model`` / ``_store`` etc. are still resolved via ``__dict__``.
+        """
+        try:
+            return super().__getattr__(name)
+        except AttributeError:
+            return getattr(self._model, name)
--- a/SenseNova-U1/src/sensenova_u1/utils/lora.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/lora.py
+import torch
+import torch.nn as nn
+from safetensors.torch import safe_open
+def build_lora_names(key, lora_down_key, lora_up_key, is_native_weight):
+    base = "diffusion_model." if is_native_weight else ""
+    lora_down = base + key.replace(".weight", lora_down_key)
+    lora_up = base + key.replace(".weight", lora_up_key)
+    lora_alpha = base + key.replace(".weight", ".alpha")
+    return lora_down, lora_up, lora_alpha
+def load_and_merge_lora_weight(
+    model: nn.Module,
+    lora_state_dict: dict,
+    lora_down_key: str = ".lora_down.weight",
+    lora_up_key: str = ".lora_up.weight",
+):
+    is_native_weight = any("diffusion_model." in key for key in lora_state_dict)
+    for key, value in model.named_parameters():
+        lora_down_name, lora_up_name, lora_alpha_name = build_lora_names(
+            key, lora_down_key, lora_up_key, is_native_weight
+        )
+        if lora_down_name in lora_state_dict:
+            lora_down = lora_state_dict[lora_down_name]
+            lora_up = lora_state_dict[lora_up_name]
+            lora_alpha = float(lora_state_dict[lora_alpha_name])
+            rank = lora_down.shape[0]
+            scaling_factor = lora_alpha / rank
+            assert lora_up.dtype == torch.float32
+            assert lora_down.dtype == torch.float32
+            delta_W = scaling_factor * torch.matmul(lora_up, lora_down).to(value.device)
+            value.data = (value.data + delta_W).type_as(value.data)
+    return model
+def load_and_merge_lora_weight_from_safetensors(
+    model: nn.Module,
+    lora_weight_path: str,
+    lora_down_key: str = ".lora_down.weight",
+    lora_up_key: str = ".lora_up.weight",
+):
+    lora_state_dict = {}
+    with safe_open(lora_weight_path, framework="pt", device="cpu") as f:
+        for key in f.keys():
+            lora_state_dict[key] = f.get_tensor(key)
+    model = load_and_merge_lora_weight(model, lora_state_dict, lora_down_key, lora_up_key)
+    return model
--- a/SenseNova-U1/src/sensenova_u1/utils/offload.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/offload.py
+"""Context managers for CPU<->GPU layer offload during inference.
+Wraps :class:`LayerOffloadWrapper` from :mod:`.layer_offload` so callers can
+enter a ``with`` block, run generation through the wrapped model, and have
+the wrapper torn down + host pinned-memory cache released on exit.
+"""
+from __future__ import annotations
+import contextlib
+import gc
+import logging
+from collections.abc import Iterator
+from contextlib import AbstractContextManager, contextmanager
+from typing import TypeVar
+import torch
+from torch import nn
+from . import accel
+from .layer_offload import LayerOffloadWrapper
+LOGGER = logging.getLogger(__name__)
+_M = TypeVar("_M", bound=nn.Module)
+VRAM_MODE_OPTIONS: tuple[str, ...] = ("full", "low", "balanced")
+DEFAULT_VRAM_MODE: str = "full"
+_VRAM_MODE_TO_PREFETCH: dict[str, int] = {
+    "full": 0,
+    "low": 1,
+    "balanced": 2,
+}
+DEFAULT_LAYERS_ATTR: str = "language_model.model.layers"
+def vram_mode_to_prefetch_count(mode: str) -> int:
+    """Map a ``--vram_mode`` choice to the layer-offload ``prefetch_count``.
+    ``0`` means the model stays fully on GPU (no offload). ``1`` means
+    synchronous per-layer swap; ``>=2`` means async prefetch.
+    """
+    if mode not in _VRAM_MODE_TO_PREFETCH:
+        raise ValueError(f"Unsupported vram_mode={mode!r}. Choose one of {VRAM_MODE_OPTIONS}.")
+    return _VRAM_MODE_TO_PREFETCH[mode]
+def make_offload_ctx(
+    model: nn.Module,
+    prefetch_count: int,
+    target_device: str | torch.device,
+    layers_attr: str = DEFAULT_LAYERS_ATTR,
+) -> AbstractContextManager[nn.Module]:
+    """Pick the right offload context for ``prefetch_count``.
+    ``0`` returns a pass-through context yielding ``model`` unchanged.
+    ``1`` returns the synchronous offload context (one resident layer).
+    ``>=2`` returns the async prefetch context with that many layers ahead.
+    """
+    if prefetch_count == 0:
+        return contextlib.nullcontext(model)
+    target = target_device if isinstance(target_device, torch.device) else torch.device(target_device)
+    if prefetch_count == 1:
+        return offload_layers_sync(model, layers_attr, target)
+    return offload_layers_async(model, layers_attr, target, prefetch_count=prefetch_count)
+def _cleanup_memory() -> None:
+    gc.collect()
+    accel.empty_cache()
+    accel.synchronize()
+def _log_vram(label: str, target_device: torch.device) -> None:
+    """Log allocated / reserved / peak VRAM with ``label``.
+    Used to diagnose the ComfyUI-only VRAM growth under
+    ``vram_mode='balanced'``. Best-effort; never raises.
+    """
+    try:
+        if target_device.type not in accel.SUPPORTED_DEVICE_TYPES or not accel.is_available(target_device.type):
+            return
+        mod = accel.accel_module(target_device)
+        alloc = mod.memory_allocated(target_device) / (1024**3)
+        reserved = mod.memory_reserved(target_device) / (1024**3)
+        peak = mod.max_memory_allocated(target_device) / (1024**3)
+        LOGGER.info(
+            "[offload vram] %-40s | alloc=%6.2f GiB  reserved=%6.2f GiB  peak=%6.2f GiB",
+            label,
+            alloc,
+            reserved,
+            peak,
+        )
+    except Exception as exc:  # pragma: no cover - diagnostic only
+        LOGGER.debug("offload vram log %r failed: %s", label, exc)
+def _empty_host_cache(target_device: torch.device) -> None:
+    """Release PyTorch's pinned host-memory cache.
+    Without this, repeated offload runs eventually exhaust host memory
+    because the CachingHostAllocator keeps freed pinned blocks cached. The
+    host cache is global (not per-backend); we still synchronize the active
+    accelerator first so in-flight H2D copies don't reference freed blocks.
+    """
+    if target_device.type not in accel.SUPPORTED_DEVICE_TYPES or not accel.is_available(target_device.type):
+        return
+    try:
+        accel.synchronize(target_device)
+        if hasattr(torch._C, "_host_emptyCache"):
+            torch._C._host_emptyCache()
+    except Exception as exc:  # pragma: no cover - best-effort cleanup
+        LOGGER.warning("offload: host cache release failed: %s", exc)
+@contextmanager
+def _offload_layers(
+    model: _M,
+    layers_attr: str,
+    target_device: torch.device,
+    prefetch_count: int,
+) -> Iterator[nn.Module]:
+    wrapper = LayerOffloadWrapper(
+        model,
+        layers_attr=layers_attr,
+        target_device=target_device,
+        prefetch_count=prefetch_count,
+    )
+    try:
+        yield wrapper
+    finally:
+        try:
+            wrapper.teardown()
+        except Exception as exc:  # pragma: no cover
+            LOGGER.warning("offload: teardown failed: %s", exc)
+        try:
+            model.to("cpu")
+        except Exception as exc:  # pragma: no cover
+            LOGGER.warning("offload: model.to('cpu') failed: %s", exc)
+        _log_vram("offload._offload_layers: pre-empty_cache", target_device)
+        _cleanup_memory()
+        _log_vram("offload._offload_layers: post-empty_cache", target_device)
+        _empty_host_cache(target_device)
+        _log_vram("offload._offload_layers: post-host_empty_cache", target_device)
+def offload_layers_sync(
+    model: _M,
+    layers_attr: str,
+    target_device: torch.device,
+) -> AbstractContextManager[nn.Module]:
+    """Synchronous CPU<->GPU layer offload. Lower memory, slower.
+    Each offloaded layer is loaded just before its forward and evicted right
+    after; exactly one layer's weights are resident on GPU.
+    """
+    return _offload_layers(model, layers_attr, target_device, prefetch_count=0)
+def offload_layers_async(
+    model: _M,
+    layers_attr: str,
+    target_device: torch.device,
+    prefetch_count: int = 2,
+) -> AbstractContextManager[nn.Module]:
+    """Async-prefetch layer offload. Higher memory, faster.
+    ``prefetch_count`` is how many layers ahead to prefetch on a dedicated
+    CUDA stream; must be >= 1.
+    """
+    if prefetch_count < 1:
+        raise ValueError("prefetch_count must be >= 1 for async offload")
+    return _offload_layers(model, layers_attr, target_device, prefetch_count=prefetch_count)
--- a/SenseNova-U1/src/sensenova_u1/utils/param_count.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/param_count.py
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from typing import Iterable
+import torch
+from transformers import AutoConfig, AutoModel
+from sensenova_u1 import check_checkpoint_compatibility
+@dataclass(frozen=True)
+class GroupRule:
+    name: str
+    prefixes: tuple[str, ...] = ()
+    contains: tuple[str, ...] = ()
+    excludes: tuple[str, ...] = ()
+@dataclass(frozen=True)
+class ParamEntry:
+    name: str
+    numel: int
+    dtype: str
+    bytes: int
+@dataclass(frozen=True)
+class ParamGroupStat:
+    name: str
+    params: int
+    trainable_params: int
+    bytes: int
+    entries: tuple[ParamEntry, ...]
+@dataclass(frozen=True)
+class ParamCountResult:
+    model_path: str
+    total_params: int
+    trainable_params: int
+    total_bytes: int
+    groups: tuple[ParamGroupStat, ...]
+# NOTE on architecture (SenseNova-U1, MoT):
+#   * vision_model.*                       -> visual und.
+#   * fm_modules.*                         -> generation-only (visual gen., fm_head, timestep/noise embedders)
+#   * language_model.* w/ "_mot_gen"       -> generation expert inside the LLM backbone
+#   * language_model.* w/o "_mot_gen"      -> understanding expert inside the LLM backbone
+#   * language_model.model.embed_tokens.*  -> token input embedding, used by every text token in both
+#                                             pathways (image-gen still embeds the text prompt)
+#   * language_model.lm_head.*             -> text-token output projection. Also exercised by the
+#                                             generation pathway because t2i-reasoning runs a
+#                                             thinking phase that emits text tokens before image
+#                                             tokens. Hence both belong to the "shared" group.
+DEFAULT_GROUPS: tuple[GroupRule, ...] = (
+    GroupRule("generation_transformer", prefixes=("fm_modules",)),
+    GroupRule(
+        "generation_transformer",
+        prefixes=("language_model",),
+        contains=("_mot_gen",),
+    ),
+    GroupRule(
+        "shared",
+        prefixes=(
+            "language_model.model.embed_tokens",
+            "language_model.lm_head",
+        ),
+    ),
+    GroupRule("understanding_transformer", prefixes=("vision_model",)),
+    GroupRule("understanding_transformer", prefixes=("language_model",)),
+)
+def format_param_count(n: int) -> str:
+    """Format a parameter count using SI suffixes (B = Billion = 1e9)."""
+    units = (("B", 1_000_000_000), ("M", 1_000_000), ("K", 1_000))
+    for suffix, base in units:
+        if abs(n) >= base:
+            return f"{n / base:.3f}{suffix}"
+    return str(n)
+def format_bytes(n: int) -> str:
+    """Format a byte count in decimal units (GB = 1e9 bytes), matching SI."""
+    units = (("GB", 1_000_000_000), ("MB", 1_000_000), ("KB", 1_000))
+    for suffix, base in units:
+        if abs(n) >= base:
+            return f"{n / base:.3f}{suffix}"
+    return f"{n}B"
+def build_rules(custom_groups_json: str | None = None) -> tuple[GroupRule, ...]:
+    if not custom_groups_json:
+        return DEFAULT_GROUPS
+    with open(custom_groups_json, "r", encoding="utf-8") as f:
+        raw = json.load(f)
+    if not isinstance(raw, dict):
+        raise ValueError("custom group config must be a JSON object")
+    rules: list[GroupRule] = []
+    for group_name, prefixes in raw.items():
+        if not isinstance(group_name, str):
+            raise ValueError("group name must be string")
+        if not isinstance(prefixes, list) or not all(isinstance(x, str) for x in prefixes):
+            raise ValueError(f"group '{group_name}' prefixes must be list[str]")
+        rules.append(GroupRule(group_name, tuple(prefixes)))
+    return tuple(rules)
+def _rule_matches(rule: GroupRule, param_name: str) -> bool:
+    if rule.prefixes and not any(param_name.startswith(p) for p in rule.prefixes):
+        return False
+    if rule.contains and not any(c in param_name for c in rule.contains):
+        return False
+    if rule.excludes and any(e in param_name for e in rule.excludes):
+        return False
+    return bool(rule.prefixes or rule.contains)
+def infer_group(param_name: str, rules: Iterable[GroupRule]) -> str:
+    for rule in rules:
+        if _rule_matches(rule, param_name):
+            return rule.name
+    lowered = param_name.lower()
+    if "embed" in lowered or "embedding" in lowered:
+        return "embedding_misc"
+    return "other"
+class ModelParamInspector:
+    def __init__(
+        self,
+        model_path: str,
+        *,
+        dtype: torch.dtype = torch.bfloat16,
+    ) -> None:
+        self.model_path = model_path
+        config = AutoConfig.from_pretrained(model_path)
+        check_checkpoint_compatibility(config)
+        self.model = AutoModel.from_pretrained(model_path, config=config, torch_dtype=dtype)
+    def count(self, rules: Iterable[GroupRule]) -> ParamCountResult:
+        total_params = 0
+        trainable_params = 0
+        total_bytes = 0
+        group_to_params: dict[str, int] = {}
+        group_to_trainable: dict[str, int] = {}
+        group_to_bytes: dict[str, int] = {}
+        group_to_entries: dict[str, list[ParamEntry]] = {}
+        seen_param_ids: set[int] = set()
+        for name, param in self.model.named_parameters():
+            param_id = id(param)
+            if param_id in seen_param_ids:
+                continue
+            seen_param_ids.add(param_id)
+            numel = int(param.numel())
+            # element_size() reflects the actual per-element byte width of this
+            # parameter, which is robust to mixed-dtype checkpoints (e.g. norms
+            # forced to fp32 even when the rest is loaded as bf16).
+            nbytes = numel * param.element_size()
+            total_params += numel
+            total_bytes += nbytes
+            if param.requires_grad:
+                trainable_params += numel
+            group = infer_group(name, rules)
+            group_to_params[group] = group_to_params.get(group, 0) + numel
+            group_to_bytes[group] = group_to_bytes.get(group, 0) + nbytes
+            group_to_entries.setdefault(group, []).append(
+                ParamEntry(
+                    name=name,
+                    numel=numel,
+                    dtype=str(param.dtype).replace("torch.", ""),
+                    bytes=nbytes,
+                )
+            )
+            if param.requires_grad:
+                group_to_trainable[group] = group_to_trainable.get(group, 0) + numel
+        groups = tuple(
+            ParamGroupStat(
+                name=k,
+                params=v,
+                trainable_params=group_to_trainable.get(k, 0),
+                bytes=group_to_bytes.get(k, 0),
+                entries=tuple(sorted(group_to_entries.get(k, []), key=lambda e: e.numel, reverse=True)),
+            )
+            for k, v in sorted(group_to_params.items(), key=lambda x: x[1], reverse=True)
+        )
+        return ParamCountResult(
+            model_path=self.model_path,
+            total_params=total_params,
+            trainable_params=trainable_params,
+            total_bytes=total_bytes,
+            groups=groups,
+        )
--- a/SenseNova-U1/src/sensenova_u1/utils/profiler.py
+++ b/SenseNova-U1/src/sensenova_u1/utils/profiler.py
+"""Inference timing profiler.
+Records model-load time and per-generation wall time (CUDA-synchronized so
+GPU launch overhead doesn't hide inside Python). ``report()`` prints a summary
+that also converts per-image time into per-token cost using a fixed image
+patch size (the model's generation patchification factor). For CUDA devices,
+it also records peak memory allocated/reserved during model load and each
+generation block.
+Intended for quick, human-readable profiling from CLI scripts under
+``examples/``. When ``enabled=False``, every context manager is a no-op and
+``report()`` prints nothing, so it can be wired in unconditionally.
+Typical usage::
+    from sensenova_u1.utils import InferenceProfiler
+    prof = InferenceProfiler(enabled=args.profile, device=args.device)
+    with prof.time_load():
+        engine = SenseNovaU1T2I(model_path)
+    with prof.time_generate(width=2048, height=2048, batch=1):
+        images = engine.generate(...)
+    prof.report()
+"""
+from __future__ import annotations
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import Iterator, List, Mapping
+import torch
+try:
+    import resource as _resource  # POSIX-only; Windows falls back to 0
+except ImportError:  # pragma: no cover - non-POSIX
+    _resource = None  # type: ignore[assignment]
+DEFAULT_IMAGE_PATCH_SIZE = 32
+def _process_rss_peak() -> int:
+    """Return process-wide peak resident set size in bytes (0 if unavailable).
+    ``ru_maxrss`` is a monotonic high-water mark since process start: it cannot
+    be reset, so per-block values reflect cumulative peak, not delta.
+    """
+    if _resource is None:
+        return 0
+    rss = _resource.getrusage(_resource.RUSAGE_SELF).ru_maxrss
+    # Linux reports kB; macOS reports bytes. Heuristic: huge value => already bytes.
+    return rss * 1024 if rss < (1 << 40) else rss
+@dataclass
+class _MemoryPeak:
+    allocated: int = 0
+    reserved: int = 0
+    cpu_rss: int = 0
+    by_device: tuple[tuple[str, int, int], ...] = ()
+    @property
+    def available(self) -> bool:
+        return self.allocated > 0 or self.reserved > 0 or self.cpu_rss > 0
+@dataclass
+class _GenerationRecord:
+    width: int
+    height: int
+    batch: int
+    seconds: float
+    memory_peak: _MemoryPeak
+@dataclass
+class GenerationHandle:
+    """Mutable handle yielded by :meth:`InferenceProfiler.time_generate`.
+    Callers may overwrite ``batch`` (and width/height) after the generate
+    call returns when the true count is only known post-hoc — e.g. interleave
+    inference, where one call produces a variable number of images.
+    """
+    width: int
+    height: int
+    batch: int
+class InferenceProfiler:
+    """Minimal wall-clock profiler for model loading + generation.
+    Parameters
+    ----------
+    enabled : bool
+        If False, every method is a no-op (zero overhead).
+    device : str
+        E.g. ``"cuda"``, ``"cuda:0"``, ``"cpu"``. Used to decide whether to
+        ``torch.cuda.synchronize()`` around timed blocks.
+    patch_size : int, optional
+        Image-token grid factor used by :meth:`report` to translate wall time
+        into ms/token. Defaults to :data:`DEFAULT_IMAGE_PATCH_SIZE`.
+    """
+    def __init__(
+        self,
+        enabled: bool,
+        device: str = "cuda",
+        patch_size: int = DEFAULT_IMAGE_PATCH_SIZE,
+        config: Mapping[str, object] | None = None,
+    ) -> None:
+        self.enabled = enabled
+        self.device = device
+        self.patch_size = patch_size
+        self.load_time: float = 0.0
+        self.load_memory_peak = _MemoryPeak()
+        self.gen_records: List[_GenerationRecord] = []
+        self.config: dict[str, str] = {}
+        if config:
+            self.set_config(config)
+    def set_config(self, config: Mapping[str, object]) -> None:
+        """Attach run metadata (e.g. vram_mode, attn_backend, dtype) shown in report().
+        ``None`` values are dropped so callers can pass-through optional args
+        without filtering. Existing keys are overwritten.
+        """
+        for key, value in config.items():
+            if value is None:
+                continue
+            self.config[key] = str(value)
+    # ------------------------------------------------------------------
+    # timing
+    # ------------------------------------------------------------------
+    def _sync(self) -> None:
+        if self.enabled and self.device.startswith("cuda") and torch.cuda.is_available():
+            torch.cuda.synchronize()
+    def _has_cuda_memory_stats(self) -> bool:
+        return self.enabled and self.device.startswith("cuda") and torch.cuda.is_available()
+    def _cuda_devices(self) -> list[torch.device]:
+        device = torch.device(self.device)
+        if device.type != "cuda":
+            return []
+        if device.index is not None:
+            return [device]
+        return [torch.device(f"cuda:{idx}") for idx in range(torch.cuda.device_count())]
+    def _reset_memory_peak(self) -> None:
+        if self._has_cuda_memory_stats():
+            for device in self._cuda_devices():
+                torch.cuda.reset_peak_memory_stats(device)
+    def _memory_peak(self) -> _MemoryPeak:
+        cpu_rss = _process_rss_peak()
+        if not self._has_cuda_memory_stats():
+            return _MemoryPeak(cpu_rss=cpu_rss)
+        by_device = tuple(
+            (
+                str(device),
+                torch.cuda.max_memory_allocated(device),
+                torch.cuda.max_memory_reserved(device),
+            )
+            for device in self._cuda_devices()
+        )
+        return _MemoryPeak(
+            allocated=sum(allocated for _, allocated, _ in by_device),
+            reserved=sum(reserved for _, _, reserved in by_device),
+            cpu_rss=cpu_rss,
+            by_device=by_device,
+        )
+    @contextmanager
+    def time_load(self) -> Iterator[None]:
+        if not self.enabled:
+            yield
+            return
+        self._sync()
+        self._reset_memory_peak()
+        t0 = time.perf_counter()
+        try:
+            yield
+        finally:
+            self._sync()
+            self.load_time = time.perf_counter() - t0
+            self.load_memory_peak = self._memory_peak()
+    @contextmanager
+    def time_generate(self, width: int, height: int, batch: int = 1) -> Iterator[GenerationHandle]:
+        """Time a generation block. Yields a mutable handle so callers can
+        correct ``batch`` (or width/height) after the call when the true
+        count is only known post-hoc (e.g. interleave produces N images per
+        call). Existing callers that ignore the yielded value still work."""
+        handle = GenerationHandle(width=width, height=height, batch=batch)
+        if not self.enabled:
+            yield handle
+            return
+        self._sync()
+        self._reset_memory_peak()
+        t0 = time.perf_counter()
+        try:
+            yield handle
+        finally:
+            self._sync()
+            self.gen_records.append(
+                _GenerationRecord(
+                    width=handle.width,
+                    height=handle.height,
+                    batch=handle.batch,
+                    seconds=time.perf_counter() - t0,
+                    memory_peak=self._memory_peak(),
+                )
+            )
+    def update_last_batch(self, n: int) -> None:
+        """Correct the batch count of the most recent time_generate record.
+        Call this immediately after the context manager exits, once the actual
+        number of generated images is known (e.g. len(images) for interleaved
+        generation where one call produces a variable number of images).
+        """
+        if self.enabled and self.gen_records:
+            self.gen_records[-1].batch = n
+    # ------------------------------------------------------------------
+    # reporting
+    # ------------------------------------------------------------------
+    def report(self) -> None:
+        """Print a summary. No-op when ``enabled=False``."""
+        if not self.enabled:
+            return
+        print()
+        print("=" * 64)
+        print("Profile summary")
+        print("=" * 64)
+        if self.config:
+            config_str = ", ".join(f"{k}={v}" for k, v in self.config.items())
+            print(f"  config              : {config_str}")
+        print(f"  model load          : {self.load_time:8.3f} s")
+        if self.load_memory_peak.available:
+            print(f"  load peak memory    : {self._format_memory(self.load_memory_peak)}")
+        if not self.gen_records:
+            print("  (no generations were timed)")
+            return
+        total_images = sum(record.batch for record in self.gen_records)
+        total_time = sum(record.seconds for record in self.gen_records)
+        avg_per_image = total_time / total_images
+        total_tokens = sum(
+            (record.width // self.patch_size) * (record.height // self.patch_size) * record.batch
+            for record in self.gen_records
+        )
+        avg_tokens = total_tokens / total_images
+        tokens_per_sec = total_tokens / total_time
+        peak_generation_memory = self._max_memory_peak(record.memory_peak for record in self.gen_records)
+        print(
+            f"  generations         : {len(self.gen_records)} call(s), "
+            f"{total_images} image(s) total, {total_time:.3f} s wall"
+        )
+        print(f"  avg per image       : {avg_per_image:8.3f} s")
+        print(
+            f"  image tokens        : patch_size={self.patch_size}, "
+            f"avg {avg_tokens:.0f} tok/image ({int(avg_tokens):d})"
+        )
+        print(f"  throughput          : {tokens_per_sec:8.2f} tok/s")
+        if peak_generation_memory.available:
+            print(f"  generation peak mem : {self._format_memory(peak_generation_memory)}")
+        if len(self.gen_records) > 1:
+            print("  per-call breakdown  :")
+            for idx, record in enumerate(self.gen_records):
+                tokens = (record.width // self.patch_size) * (record.height // self.patch_size) * record.batch
+                memory = f", {self._format_memory(record.memory_peak)}" if record.memory_peak.available else ""
+                print(
+                    f"    [{idx + 1:>3}] {record.width}x{record.height} x{record.batch}  "
+                    f"{record.seconds:7.3f} s  ({tokens:>6d} tok, "
+                    f"{tokens / record.seconds:8.2f} tok/s{memory})"
+                )
+        print("=" * 64)
+    @staticmethod
+    def _format_bytes(num_bytes: int) -> str:
+        return f"{num_bytes / (1024**3):.2f} GiB"
+    @classmethod
+    def _format_memory(cls, memory_peak: _MemoryPeak) -> str:
+        parts: list[str] = []
+        if memory_peak.allocated > 0 or memory_peak.reserved > 0:
+            parts.append(
+                f"allocated {cls._format_bytes(memory_peak.allocated)}, "
+                f"reserved {cls._format_bytes(memory_peak.reserved)}"
+            )
+        if memory_peak.cpu_rss > 0:
+            parts.append(f"cpu RSS {cls._format_bytes(memory_peak.cpu_rss)}")
+        text = ", ".join(parts) if parts else "n/a"
+        if len(memory_peak.by_device) <= 1:
+            return text
+        details = ", ".join(
+            f"{name}: {cls._format_bytes(allocated)} alloc/{cls._format_bytes(reserved)} reserved"
+            for name, allocated, reserved in memory_peak.by_device
+        )
+        return f"{text} ({details})"
+    @staticmethod
+    def _max_memory_peak(memory_peaks: Iterator[_MemoryPeak]) -> _MemoryPeak:
+        max_peak = _MemoryPeak()
+        by_device: dict[str, tuple[int, int]] = {}
+        for memory_peak in memory_peaks:
+            max_peak.allocated = max(max_peak.allocated, memory_peak.allocated)
+            max_peak.reserved = max(max_peak.reserved, memory_peak.reserved)
+            max_peak.cpu_rss = max(max_peak.cpu_rss, memory_peak.cpu_rss)
+            for name, allocated, reserved in memory_peak.by_device:
+                prev_allocated, prev_reserved = by_device.get(name, (0, 0))
+                by_device[name] = (max(prev_allocated, allocated), max(prev_reserved, reserved))
+        max_peak.by_device = tuple((name, allocated, reserved) for name, (allocated, reserved) in by_device.items())
+        return max_peak
--- a/SenseNova-U1/uv.lock
+++ b/SenseNova-U1/uv.lock
+version = 1
+revision = 2
+requires-python = "==3.11.*"
+[[package]]
+name = "accelerate"
+version = "1.10.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "psutil" },
+    { name = "pyyaml" },
+    { name = "safetensors" },
+    { name = "torch" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/72/ff3961c19ee395c3d30ac630ee77bfb0e1b46b87edc504d4f83bb4a89705/accelerate-1.10.1.tar.gz", hash = "sha256:3dea89e433420e4bfac0369cae7e36dcd6a56adfcfd38cdda145c6225eab5df8", size = 392446, upload-time = "2025-08-25T13:57:06.21Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5f/a0/d9ef19f780f319c21ee90ecfef4431cbeeca95bec7f14071785c17b6029b/accelerate-1.10.1-py3-none-any.whl", hash = "sha256:3621cff60b9a27ce798857ece05e2b9f56fcc71631cfb31ccf71f0359c311f11", size = 374909, upload-time = "2025-08-25T13:57:04.55Z" },
+]
+[[package]]
+name = "anyio"
+version = "4.13.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" },
+]
+[[package]]
+name = "certifi"
+version = "2026.2.25"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
+]
+[[package]]
+name = "cfgv"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
+]
+[[package]]
+name = "charset-normalizer"
+version = "3.4.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/d7/b5b7020a0565c2e9fa8c09f4b5fa6232feb326b8c20081ccded47ea368fd/charset_normalizer-3.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7", size = 309705, upload-time = "2026-04-02T09:26:02.191Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/53/58c29116c340e5456724ecd2fff4196d236b98f3da97b404bc5e51ac3493/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7", size = 206419, upload-time = "2026-04-02T09:26:03.583Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/02/e8146dc6591a37a00e5144c63f29fb7c97a734ea8a111190783c0e60ab63/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e", size = 227901, upload-time = "2026-04-02T09:26:04.738Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/73/77486c4cd58f1267bf17db420e930c9afa1b3be3fe8c8b8ebbebc9624359/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c", size = 222742, upload-time = "2026-04-02T09:26:06.36Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/fa/f74eb381a7d94ded44739e9d94de18dc5edc9c17fb8c11f0a6890696c0a9/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df", size = 214061, upload-time = "2026-04-02T09:26:08.347Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/92/42bd3cefcf7687253fb86694b45f37b733c97f59af3724f356fa92b8c344/charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265", size = 199239, upload-time = "2026-04-02T09:26:09.823Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/3d/069e7184e2aa3b3cddc700e3dd267413dc259854adc3380421c805c6a17d/charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4", size = 210173, upload-time = "2026-04-02T09:26:10.953Z" },
+    { url = "https://files.pythonhosted.org/packages/62/51/9d56feb5f2e7074c46f93e0ebdbe61f0848ee246e2f0d89f8e20b89ebb8f/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e", size = 209841, upload-time = "2026-04-02T09:26:12.142Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/59/893d8f99cc4c837dda1fe2f1139079703deb9f321aabcb032355de13b6c7/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38", size = 200304, upload-time = "2026-04-02T09:26:13.711Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/1d/ee6f3be3464247578d1ed5c46de545ccc3d3ff933695395c402c21fa6b77/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c", size = 229455, upload-time = "2026-04-02T09:26:14.941Z" },
+    { url = "https://files.pythonhosted.org/packages/54/bb/8fb0a946296ea96a488928bdce8ef99023998c48e4713af533e9bb98ef07/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b", size = 210036, upload-time = "2026-04-02T09:26:16.478Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/bc/015b2387f913749f82afd4fcba07846d05b6d784dd16123cb66860e0237d/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c", size = 224739, upload-time = "2026-04-02T09:26:17.751Z" },
+    { url = "https://files.pythonhosted.org/packages/17/ab/63133691f56baae417493cba6b7c641571a2130eb7bceba6773367ab9ec5/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d", size = 216277, upload-time = "2026-04-02T09:26:18.981Z" },
+    { url = "https://files.pythonhosted.org/packages/06/6d/3be70e827977f20db77c12a97e6a9f973631a45b8d186c084527e53e77a4/charset_normalizer-3.4.7-cp311-cp311-win32.whl", hash = "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad", size = 147819, upload-time = "2026-04-02T09:26:20.295Z" },
+    { url = "https://files.pythonhosted.org/packages/20/d9/5f67790f06b735d7c7637171bbfd89882ad67201891b7275e51116ed8207/charset_normalizer-3.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00", size = 159281, upload-time = "2026-04-02T09:26:21.74Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/83/6413f36c5a34afead88ce6f66684d943d91f233d76dd083798f9602b75ae/charset_normalizer-3.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1", size = 147843, upload-time = "2026-04-02T09:26:22.901Z" },
+    { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" },
+]
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+[[package]]
+name = "distlib"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
+]
+[[package]]
+name = "einops"
+version = "0.8.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/77/850bef8d72ffb9219f0b1aac23fbc1bf7d038ee6ea666f331fa273031aa2/einops-0.8.2.tar.gz", hash = "sha256:609da665570e5e265e27283aab09e7f279ade90c4f01bcfca111f3d3e13f2827", size = 56261, upload-time = "2026-01-26T04:13:17.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/09/f8d8f8f31e4483c10a906437b4ce31bdf3d6d417b73fe33f1a8b59e34228/einops-0.8.2-py3-none-any.whl", hash = "sha256:54058201ac7087911181bfec4af6091bb59380360f069276601256a76af08193", size = 65638, upload-time = "2026-01-26T04:13:18.546Z" },
+]
+[[package]]
+name = "filelock"
+version = "3.28.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/17/6e8890271880903e3538660a21d63a6c1fea969ac71d0d6b608b78727fa9/filelock-3.28.0.tar.gz", hash = "sha256:4ed1010aae813c4ee8d9c660e4792475ee60c4a0ba76073ceaf862bd317e3ca6", size = 56474, upload-time = "2026-04-14T22:54:33.625Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/21/2f728888c45033d34a417bfcd248ea2564c9e08ab1bfd301377cf05d5586/filelock-3.28.0-py3-none-any.whl", hash = "sha256:de9af6712788e7171df1b28b15eba2446c69721433fa427a9bee07b17820a9db", size = 39189, upload-time = "2026-04-14T22:54:32.037Z" },
+]
+[[package]]
+name = "flash-attn"
+version = "2.8.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "einops" },
+    { name = "torch" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3b/b2/8d76c41ad7974ee264754709c22963447f7f8134613fd9ce80984ed0dab7/flash_attn-2.8.3.tar.gz", hash = "sha256:1e71dd64a9e0280e0447b8a0c2541bad4bf6ac65bdeaa2f90e51a9e57de0370d", size = 8447812, upload-time = "2025-08-15T08:28:12.911Z" }
+[[package]]
+name = "fsspec"
+version = "2026.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e1/cf/b50ddf667c15276a9ab15a70ef5f257564de271957933ffea49d2cdbcdfb/fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41", size = 313547, upload-time = "2026-03-27T19:11:14.892Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" },
+]
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+[[package]]
+name = "hf-xet"
+version = "1.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" },
+    { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" },
+    { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" },
+    { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" },
+]
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+[[package]]
+name = "huggingface-hub"
+version = "0.36.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
+]
+[[package]]
+name = "identify"
+version = "2.6.19"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/63/51723b5f116cc04b061cb6f5a561790abf249d25931d515cd375e063e0f4/identify-2.6.19.tar.gz", hash = "sha256:6be5020c38fcb07da56c53733538a3081ea5aa70d36a156f83044bfbf9173842", size = 99567, upload-time = "2026-04-17T18:39:50.265Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/84/d9273cd09688070a6523c4aee4663a8538721b2b755c4962aafae0011e72/identify-2.6.19-py2.py3-none-any.whl", hash = "sha256:20e6a87f786f768c092a721ad107fc9df0eb89347be9396cadf3f4abbd1fb78a", size = 99397, upload-time = "2026-04-17T18:39:49.221Z" },
+]
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
+]
+[[package]]
+name = "markupsafe"
+version = "3.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940, upload-time = "2025-09-27T18:36:22.249Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887, upload-time = "2025-09-27T18:36:23.535Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692, upload-time = "2025-09-27T18:36:24.823Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471, upload-time = "2025-09-27T18:36:25.95Z" },
+    { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923, upload-time = "2025-09-27T18:36:27.109Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572, upload-time = "2025-09-27T18:36:28.045Z" },
+    { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077, upload-time = "2025-09-27T18:36:29.025Z" },
+    { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876, upload-time = "2025-09-27T18:36:29.954Z" },
+]
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
+]
+[[package]]
+name = "networkx"
+version = "3.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" },
+]
+[[package]]
+name = "nodeenv"
+version = "1.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
+]
+[[package]]
+name = "numpy"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/1b/1d565e0f6e156e1522ab564176b8b29d71e13d8caf003a08768df3d5cec5/numpy-2.2.0.tar.gz", hash = "sha256:140dd80ff8981a583a60980be1a655068f8adebf7a45a06a6858c873fcdcd4a0", size = 20225497, upload-time = "2024-12-08T15:45:53.828Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/80/1b/736023977a96e787c4e7653a1ac2d31d4f6ab6b4048f83c8359f7c0af2e3/numpy-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9874bc2ff574c40ab7a5cbb7464bf9b045d617e36754a7bc93f933d52bd9ffc6", size = 21216607, upload-time = "2024-12-08T15:24:13.737Z" },
+    { url = "https://files.pythonhosted.org/packages/85/4f/5f0be4c5c93525e663573bab9e29bd88a71f85de3a0d01413ee05bce0c2f/numpy-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0da8495970f6b101ddd0c38ace92edea30e7e12b9a926b57f5fabb1ecc25bb90", size = 14387756, upload-time = "2024-12-08T15:24:35.67Z" },
+    { url = "https://files.pythonhosted.org/packages/36/78/c38af7833c4f29999cdacdf12452b43b660cd25a1990ea9a7edf1fb01f17/numpy-2.2.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0557eebc699c1c34cccdd8c3778c9294e8196df27d713706895edc6f57d29608", size = 5388483, upload-time = "2024-12-08T15:24:45.64Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/b5/306ac6ee3f8f0c51abd3664ee8a9b8e264cbf179a860674827151ecc0a9c/numpy-2.2.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:3579eaeb5e07f3ded59298ce22b65f877a86ba8e9fe701f5576c99bb17c283da", size = 6929721, upload-time = "2024-12-08T15:24:57.76Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/15/e33a7d86d8ce91de82c34ce94a87f2b8df891e603675e83ec7039325ff10/numpy-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40deb10198bbaa531509aad0cd2f9fadb26c8b94070831e2208e7df543562b74", size = 14334667, upload-time = "2024-12-08T15:25:20.087Z" },
+    { url = "https://files.pythonhosted.org/packages/52/33/10825f580f42a353f744abc450dcd2a4b1e6f1931abb0ccbd1d63bd3993c/numpy-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2aed8fcf8abc3020d6a9ccb31dbc9e7d7819c56a348cc88fd44be269b37427e", size = 16390204, upload-time = "2024-12-08T15:25:45.414Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/24/36cce77559572bdc6c8bcdd2f3e0db03c7079d14b9a1cd342476d7f451e8/numpy-2.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a222d764352c773aa5ebde02dd84dba3279c81c6db2e482d62a3fa54e5ece69b", size = 15556123, upload-time = "2024-12-08T15:26:09.247Z" },
+    { url = "https://files.pythonhosted.org/packages/05/51/2d706d14adee8f5c70c5de3831673d4d57051fc9ac6f3f6bff8811d2f9bd/numpy-2.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4e58666988605e251d42c2818c7d3d8991555381be26399303053b58a5bbf30d", size = 18179898, upload-time = "2024-12-08T15:26:37.996Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/e7/ea8b7652564113f218e75b296e3545a256d88b233021f792fd08591e8f33/numpy-2.2.0-cp311-cp311-win32.whl", hash = "sha256:4723a50e1523e1de4fccd1b9a6dcea750c2102461e9a02b2ac55ffeae09a4410", size = 6568146, upload-time = "2024-12-08T15:26:50.015Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/06/3d1ff6ed377cb0340baf90487a35f15f9dc1db8e0a07de2bf2c54a8e490f/numpy-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:16757cf28621e43e252c560d25b15f18a2f11da94fea344bf26c599b9cf54b73", size = 12916677, upload-time = "2024-12-08T15:28:41.652Z" },
+]
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+]
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+]
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+]
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+]
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+]
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+]
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+]
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+]
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cusparse-cu12" },
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+]
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+]
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+]
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.27.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" },
+]
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+]
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+]
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+]
+[[package]]
+name = "pillow"
+version = "12.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" },
+    { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" },
+    { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" },
+    { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" },
+    { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" },
+    { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" },
+    { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" },
+]
+[[package]]
+name = "platformdirs"
+version = "4.9.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9f/4a/0883b8e3802965322523f0b200ecf33d31f10991d0401162f4b23c698b42/platformdirs-4.9.6.tar.gz", hash = "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", size = 29400, upload-time = "2026-04-09T00:04:10.812Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348, upload-time = "2026-04-09T00:04:09.463Z" },
+]
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+[[package]]
+name = "pre-commit"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cfgv" },
+    { name = "identify" },
+    { name = "nodeenv" },
+    { name = "pyyaml" },
+    { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload-time = "2025-12-16T21:14:33.552Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" },
+]
+[[package]]
+name = "psutil"
+version = "7.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
+    { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
+    { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
+    { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
+]
+[[package]]
+name = "pygments"
+version = "2.20.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
+]
+[[package]]
+name = "pytest"
+version = "9.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
+]
+[[package]]
+name = "python-discovery"
+version = "1.2.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/de/ef/3bae0e537cfe91e8431efcba4434463d2c5a65f5a89edd47c6cf2f03c55f/python_discovery-1.2.2.tar.gz", hash = "sha256:876e9c57139eb757cb5878cbdd9ae5379e5d96266c99ef731119e04fffe533bb", size = 58872, upload-time = "2026-04-07T17:28:49.249Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d8/db/795879cc3ddfe338599bddea6388cc5100b088db0a4caf6e6c1af1c27e04/python_discovery-1.2.2-py3-none-any.whl", hash = "sha256:e1ae95d9af875e78f15e19aed0c6137ab1bb49c200f21f5061786490c9585c7a", size = 31894, upload-time = "2026-04-07T17:28:48.09Z" },
+]
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
+    { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
+    { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" },
+    { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" },
+    { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" },
+]
+[[package]]
+name = "regex"
+version = "2026.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/3a246dbf05666918bd3664d9d787f84a9108f6f43cc953a077e4a7dfdb7e/regex-2026.4.4.tar.gz", hash = "sha256:e08270659717f6973523ce3afbafa53515c4dc5dcad637dc215b6fd50f689423", size = 416000, upload-time = "2026-04-03T20:56:28.155Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/7a/617356cbecdb452812a5d42f720d6d5096b360d4a4c1073af700ea140ad2/regex-2026.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4c36a85b00fadb85db9d9e90144af0a980e1a3d2ef9cd0f8a5bef88054657c6", size = 489415, upload-time = "2026-04-03T20:53:11.645Z" },
+    { url = "https://files.pythonhosted.org/packages/20/e6/bf057227144d02e3ba758b66649e87531d744dda5f3254f48660f18ae9d8/regex-2026.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dcb5453ecf9cd58b562967badd1edbf092b0588a3af9e32ee3d05c985077ce87", size = 291205, upload-time = "2026-04-03T20:53:13.289Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/3b/637181b787dd1a820ba1c712cee2b4144cd84a32dc776ca067b12b2d70c8/regex-2026.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6aa809ed4dc3706cc38594d67e641601bd2f36d5555b2780ff074edfcb136cf8", size = 289225, upload-time = "2026-04-03T20:53:16.002Z" },
+    { url = "https://files.pythonhosted.org/packages/05/21/bac05d806ed02cd4b39d9c8e5b5f9a2998c94c3a351b7792e80671fa5315/regex-2026.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33424f5188a7db12958246a54f59a435b6cb62c5cf9c8d71f7cc49475a5fdada", size = 792434, upload-time = "2026-04-03T20:53:17.414Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/17/c65d1d8ae90b772d5758eb4014e1e011bb2db353fc4455432e6cc9100df7/regex-2026.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d346fccdde28abba117cc9edc696b9518c3307fbfcb689e549d9b5979018c6d", size = 861730, upload-time = "2026-04-03T20:53:18.903Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/64/933321aa082a2c6ee2785f22776143ba89840189c20d3b6b1d12b6aae16b/regex-2026.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:415a994b536440f5011aa77e50a4274d15da3245e876e5c7f19da349caaedd87", size = 906495, upload-time = "2026-04-03T20:53:20.561Z" },
+    { url = "https://files.pythonhosted.org/packages/01/ea/4c8d306e9c36ac22417336b1e02e7b358152c34dc379673f2d331143725f/regex-2026.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21e5eb86179b4c67b5759d452ea7c48eb135cd93308e7a260aa489ed2eb423a4", size = 799810, upload-time = "2026-04-03T20:53:22.961Z" },
+    { url = "https://files.pythonhosted.org/packages/29/ce/7605048f00e1379eba89d610c7d644d8f695dc9b26d3b6ecfa3132b872ff/regex-2026.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:312ec9dd1ae7d96abd8c5a36a552b2139931914407d26fba723f9e53c8186f86", size = 774242, upload-time = "2026-04-03T20:53:25.015Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/77/283e0d5023fde22cd9e86190d6d9beb21590a452b195ffe00274de470691/regex-2026.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a0d2b28aa1354c7cd7f71b7658c4326f7facac106edd7f40eda984424229fd59", size = 781257, upload-time = "2026-04-03T20:53:26.918Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/fb/7f3b772be101373c8626ed34c5d727dcbb8abd42a7b1219bc25fd9a3cc04/regex-2026.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:349d7310eddff40429a099c08d995c6d4a4bfaf3ff40bd3b5e5cb5a5a3c7d453", size = 854490, upload-time = "2026-04-03T20:53:29.065Z" },
+    { url = "https://files.pythonhosted.org/packages/85/30/56547b80f34f4dd2986e1cdd63b1712932f63b6c4ce2f79c50a6cd79d1c2/regex-2026.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:e7ab63e9fe45a9ec3417509e18116b367e89c9ceb6219222a3396fa30b147f80", size = 763544, upload-time = "2026-04-03T20:53:30.917Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/2f/ce060fdfea8eff34a8997603532e44cdb7d1f35e3bc253612a8707a90538/regex-2026.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fe896e07a5a2462308297e515c0054e9ec2dd18dfdc9427b19900b37dfe6f40b", size = 844442, upload-time = "2026-04-03T20:53:32.463Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/44/810cb113096a1dacbe82789fbfab2823f79d19b7f1271acecb7009ba9b88/regex-2026.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:eb59c65069498dbae3c0ef07bbe224e1eaa079825a437fb47a479f0af11f774f", size = 789162, upload-time = "2026-04-03T20:53:34.039Z" },
+    { url = "https://files.pythonhosted.org/packages/20/96/9647dd7f2ecf6d9ce1fb04dfdb66910d094e10d8fe53e9c15096d8aa0bd2/regex-2026.4.4-cp311-cp311-win32.whl", hash = "sha256:2a5d273181b560ef8397c8825f2b9d57013de744da9e8257b8467e5da8599351", size = 266227, upload-time = "2026-04-03T20:53:35.601Z" },
+    { url = "https://files.pythonhosted.org/packages/33/80/74e13262460530c3097ff343a17de9a34d040a5dc4de9cf3a8241faab51c/regex-2026.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:9542ccc1e689e752594309444081582f7be2fdb2df75acafea8a075108566735", size = 278399, upload-time = "2026-04-03T20:53:37.021Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/3c/39f19f47f19dcefa3403f09d13562ca1c0fd07ab54db2bc03148f3f6b46a/regex-2026.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:b5f9fb784824a042be3455b53d0b112655686fdb7a91f88f095f3fee1e2a2a54", size = 270473, upload-time = "2026-04-03T20:53:38.633Z" },
+]
+[[package]]
+name = "requests"
+version = "2.33.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5f/a4/98b9c7c6428a668bf7e42ebb7c79d576a1c3c1e3ae2d47e674b468388871/requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517", size = 134120, upload-time = "2026-03-30T16:09:15.531Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947, upload-time = "2026-03-30T16:09:13.83Z" },
+]
+[[package]]
+name = "ruff"
+version = "0.15.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/8d/192f3d7103816158dfd5ea50d098ef2aec19194e6cbccd4b3485bdb2eb2d/ruff-0.15.11.tar.gz", hash = "sha256:f092b21708bf0e7437ce9ada249dfe688ff9a0954fc94abab05dcea7dcd29c33", size = 4637264, upload-time = "2026-04-16T18:46:26.58Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/1e/6aca3427f751295ab011828e15e9bf452200ac74484f1db4be0197b8170b/ruff-0.15.11-py3-none-linux_armv6l.whl", hash = "sha256:e927cfff503135c558eb581a0c9792264aae9507904eb27809cdcff2f2c847b7", size = 10607943, upload-time = "2026-04-16T18:46:05.967Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/26/1341c262e74f36d4e84f3d6f4df0ac68cd53331a66bfc5080daa17c84c0b/ruff-0.15.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7a1b5b2938d8f890b76084d4fa843604d787a912541eae85fd7e233398bbb73e", size = 10988592, upload-time = "2026-04-16T18:46:00.742Z" },
+    { url = "https://files.pythonhosted.org/packages/03/71/850b1d6ffa9564fbb6740429bad53df1094082fe515c8c1e74b6d8d05f18/ruff-0.15.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d4176f3d194afbdaee6e41b9ccb1a2c287dba8700047df474abfbe773825d1cb", size = 10338501, upload-time = "2026-04-16T18:46:03.723Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/11/cc1284d3e298c45a817a6aadb6c3e1d70b45c9b36d8d9cce3387b495a03a/ruff-0.15.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b17c886fb88203ced3afe7f14e8d5ae96e9d2f4ccc0ee66aa19f2c2675a27e4", size = 10670693, upload-time = "2026-04-16T18:46:41.941Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/9e/f8288b034ab72b371513c13f9a41d9ba3effac54e24bfb467b007daee2ca/ruff-0.15.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49fafa220220afe7758a487b048de4c8f9f767f37dfefad46b9dd06759d003eb", size = 10416177, upload-time = "2026-04-16T18:46:21.717Z" },
+    { url = "https://files.pythonhosted.org/packages/85/71/504d79abfd3d92532ba6bbe3d1c19fada03e494332a59e37c7c2dabae427/ruff-0.15.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2ab8427e74a00d93b8bda1307b1e60970d40f304af38bccb218e056c220120d", size = 11221886, upload-time = "2026-04-16T18:46:15.086Z" },
+    { url = "https://files.pythonhosted.org/packages/43/5a/947e6ab7a5ad603d65b474be15a4cbc6d29832db5d762cd142e4e3a74164/ruff-0.15.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:195072c0c8e1fc8f940652073df082e37a5d9cb43b4ab1e4d0566ab8977a13b7", size = 12075183, upload-time = "2026-04-16T18:46:07.944Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/a1/0b7bb6268775fdd3a0818aee8efd8f5b4e231d24dd4d528ced2534023182/ruff-0.15.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a0996d486af3920dec930a2e7daed4847dfc12649b537a9335585ada163e9e", size = 11516575, upload-time = "2026-04-16T18:46:31.687Z" },
+    { url = "https://files.pythonhosted.org/packages/30/c3/bb5168fc4d233cc06e95f482770d0f3c87945a0cd9f614b90ea8dc2f2833/ruff-0.15.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bef2cb556d509259f1fe440bb9cd33c756222cf0a7afe90d15edf0866702431", size = 11306537, upload-time = "2026-04-16T18:46:36.988Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/92/4cfae6441f3967317946f3b788136eecf093729b94d6561f963ed810c82e/ruff-0.15.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:030d921a836d7d4a12cf6e8d984a88b66094ccb0e0f17ddd55067c331191bf19", size = 11296813, upload-time = "2026-04-16T18:46:24.182Z" },
+    { url = "https://files.pythonhosted.org/packages/43/26/972784c5dde8313acde8ac71ba8ac65475b85db4a2352a76c9934361f9bc/ruff-0.15.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e783b599b4577788dbbb66b9addcef87e9a8832f4ce0c19e34bf55543a2f890", size = 10633136, upload-time = "2026-04-16T18:46:39.802Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/53/3985a4f185020c2f367f2e08a103032e12564829742a1b417980ce1514a0/ruff-0.15.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ae90592246625ba4a34349d68ec28d4400d75182b71baa196ddb9f82db025ef5", size = 10424701, upload-time = "2026-04-16T18:46:10.381Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/57/bf0dfb32241b56c83bb663a826133da4bf17f682ba8c096973065f6e6a68/ruff-0.15.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1f111d62e3c983ed20e0ca2e800f8d77433a5b1161947df99a5c2a3fb60514f0", size = 10873887, upload-time = "2026-04-16T18:46:29.157Z" },
+    { url = "https://files.pythonhosted.org/packages/02/05/e48076b2a57dc33ee8c7a957296f97c744ca891a8ffb4ffb1aaa3b3f517d/ruff-0.15.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:06f483d6646f59eaffba9ae30956370d3a886625f511a3108994000480621d1c", size = 11404316, upload-time = "2026-04-16T18:46:19.462Z" },
+    { url = "https://files.pythonhosted.org/packages/88/27/0195d15fe7a897cbcba0904792c4b7c9fdd958456c3a17d2ea6093716a9a/ruff-0.15.11-py3-none-win32.whl", hash = "sha256:476a2aa56b7da0b73a3ee80b6b2f0e19cce544245479adde7baa65466664d5f3", size = 10655535, upload-time = "2026-04-16T18:46:12.47Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/5e/c927b325bd4c1d3620211a4b96f47864633199feed60fa936025ab27e090/ruff-0.15.11-py3-none-win_amd64.whl", hash = "sha256:8b6756d88d7e234fb0c98c91511aae3cd519d5e3ed271cae31b20f39cb2a12a3", size = 11779692, upload-time = "2026-04-16T18:46:17.268Z" },
+    { url = "https://files.pythonhosted.org/packages/63/b6/aeadee5443e49baa2facd51131159fd6301cc4ccfc1541e4df7b021c37dd/ruff-0.15.11-py3-none-win_arm64.whl", hash = "sha256:063fed18cc1bbe0ee7393957284a6fe8b588c6a406a285af3ee3f46da2391ee4", size = 11032614, upload-time = "2026-04-16T18:46:34.487Z" },
+]
+[[package]]
+name = "safetensors"
+version = "0.6.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ac/cc/738f3011628920e027a11754d9cae9abec1aed00f7ae860abbf843755233/safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9", size = 197968, upload-time = "2025-08-08T13:13:58.654Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/b1/3f5fd73c039fc87dba3ff8b5d528bfc5a32b597fea8e7a6a4800343a17c7/safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba", size = 454797, upload-time = "2025-08-08T13:13:52.066Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/c9/bb114c158540ee17907ec470d01980957fdaf87b4aa07914c24eba87b9c6/safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b", size = 432206, upload-time = "2025-08-08T13:13:50.931Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/8e/f70c34e47df3110e8e0bb268d90db8d4be8958a54ab0336c9be4fe86dac8/safetensors-0.6.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d2d2b3ce1e2509c68932ca03ab8f20570920cd9754b05063d4368ee52833ecd", size = 473261, upload-time = "2025-08-08T13:13:41.259Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/f5/be9c6a7c7ef773e1996dc214e73485286df1836dbd063e8085ee1976f9cb/safetensors-0.6.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:93de35a18f46b0f5a6a1f9e26d91b442094f2df02e9fd7acf224cfec4238821a", size = 485117, upload-time = "2025-08-08T13:13:43.506Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/55/23f2d0a2c96ed8665bf17a30ab4ce5270413f4d74b6d87dd663258b9af31/safetensors-0.6.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89a89b505f335640f9120fac65ddeb83e40f1fd081cb8ed88b505bdccec8d0a1", size = 616154, upload-time = "2025-08-08T13:13:45.096Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c6/affb0bd9ce02aa46e7acddbe087912a04d953d7a4d74b708c91b5806ef3f/safetensors-0.6.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc4d0d0b937e04bdf2ae6f70cd3ad51328635fe0e6214aa1fc811f3b576b3bda", size = 520713, upload-time = "2025-08-08T13:13:46.25Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/5d/5a514d7b88e310c8b146e2404e0dc161282e78634d9358975fd56dfd14be/safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8045db2c872db8f4cbe3faa0495932d89c38c899c603f21e9b6486951a5ecb8f", size = 485835, upload-time = "2025-08-08T13:13:49.373Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/7b/4fc3b2ba62c352b2071bea9cfbad330fadda70579f617506ae1a2f129cab/safetensors-0.6.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81e67e8bab9878bb568cffbc5f5e655adb38d2418351dc0859ccac158f753e19", size = 521503, upload-time = "2025-08-08T13:13:47.651Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/50/0057e11fe1f3cead9254315a6c106a16dd4b1a19cd247f7cc6414f6b7866/safetensors-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0e4d029ab0a0e0e4fdf142b194514695b1d7d3735503ba700cf36d0fc7136ce", size = 652256, upload-time = "2025-08-08T13:13:53.167Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/29/473f789e4ac242593ac1656fbece6e1ecd860bb289e635e963667807afe3/safetensors-0.6.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:fa48268185c52bfe8771e46325a1e21d317207bcabcb72e65c6e28e9ffeb29c7", size = 747281, upload-time = "2025-08-08T13:13:54.656Z" },
+    { url = "https://files.pythonhosted.org/packages/68/52/f7324aad7f2df99e05525c84d352dc217e0fa637a4f603e9f2eedfbe2c67/safetensors-0.6.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:d83c20c12c2d2f465997c51b7ecb00e407e5f94d7dec3ea0cc11d86f60d3fde5", size = 692286, upload-time = "2025-08-08T13:13:55.884Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/fe/cad1d9762868c7c5dc70c8620074df28ebb1a8e4c17d4c0cb031889c457e/safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac", size = 655957, upload-time = "2025-08-08T13:13:57.029Z" },
+    { url = "https://files.pythonhosted.org/packages/59/a7/e2158e17bbe57d104f0abbd95dff60dda916cf277c9f9663b4bf9bad8b6e/safetensors-0.6.2-cp38-abi3-win32.whl", hash = "sha256:cab75ca7c064d3911411461151cb69380c9225798a20e712b102edda2542ddb1", size = 308926, upload-time = "2025-08-08T13:14:01.095Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/c3/c0be1135726618dc1e28d181b8c442403d8dbb9e273fd791de2d4384bcdd/safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c", size = 320192, upload-time = "2025-08-08T13:13:59.467Z" },
+]
+[[package]]
+name = "sensenova-u1"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+    { name = "accelerate" },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pillow" },
+    { name = "pre-commit" },
+    { name = "safetensors" },
+    { name = "sentencepiece" },
+    { name = "tokenizers" },
+    { name = "torch" },
+    { name = "torchvision" },
+    { name = "tqdm" },
+    { name = "transformers" },
+]
+[package.optional-dependencies]
+dev = [
+    { name = "pre-commit" },
+    { name = "pytest" },
+    { name = "ruff" },
+]
+flash = [
+    { name = "flash-attn" },
+]
+[package.metadata]
+requires-dist = [
+    { name = "accelerate", specifier = "==1.10.1" },
+    { name = "flash-attn", marker = "extra == 'flash'", specifier = ">=2.8,<3" },
+    { name = "httpx", specifier = ">=0.27,<1" },
+    { name = "huggingface-hub", specifier = "==0.36.2" },
+    { name = "numpy", specifier = "==2.2.0" },
+    { name = "packaging", specifier = "==25.0" },
+    { name = "pillow", specifier = "==12.0.0" },
+    { name = "pre-commit", specifier = ">=4.5.1" },
+    { name = "pre-commit", marker = "extra == 'dev'" },
+    { name = "pytest", marker = "extra == 'dev'" },
+    { name = "ruff", marker = "extra == 'dev'" },
+    { name = "safetensors", specifier = "==0.6.2" },
+    { name = "sentencepiece", specifier = "==0.2.1" },
+    { name = "tokenizers", specifier = "==0.22.1" },
+    { name = "torch", specifier = "==2.8.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torchvision", specifier = "==0.23.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "tqdm", specifier = "==4.67.1" },
+    { name = "transformers", specifier = "==4.57.1" },
+]
+provides-extras = ["flash", "dev"]
+[[package]]
+name = "sentencepiece"
+version = "0.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/15/2e7a025fc62d764b151ae6d0f2a92f8081755ebe8d4a64099accc6f77ba6/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad", size = 3228515, upload-time = "2025-08-12T07:00:51.718Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d8/15/46afbab00733d81788b64be430ca1b93011bb9388527958e26cc31832de5/sentencepiece-0.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6356d0986b8b8dc351b943150fcd81a1c6e6e4d439772e8584c64230e58ca987", size = 1942560, upload-time = "2025-08-12T06:59:25.82Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/79/7c01b8ef98a0567e9d84a4e7a910f8e7074fcbf398a5cd76f93f4b9316f9/sentencepiece-0.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8f8ba89a3acb3dc1ae90f65ec1894b0b9596fdb98ab003ff38e058f898b39bc7", size = 1325385, upload-time = "2025-08-12T06:59:27.722Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/88/2b41e07bd24f33dcf2f18ec3b74247aa4af3526bad8907b8727ea3caba03/sentencepiece-0.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02593eca45440ef39247cee8c47322a34bdcc1d8ae83ad28ba5a899a2cf8d79a", size = 1253319, upload-time = "2025-08-12T06:59:29.306Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/54/38a1af0c6210a3c6f95aa46d23d6640636d020fba7135cd0d9a84ada05a7/sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a0d15781a171d188b661ae4bde1d998c303f6bd8621498c50c671bd45a4798e", size = 1316162, upload-time = "2025-08-12T06:59:30.914Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/66/fb191403ade791ad2c3c1e72fe8413e63781b08cfa3aa4c9dfc536d6e795/sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f5a3e0d9f445ed9d66c0fec47d4b23d12cfc858b407a03c194c1b26c2ac2a63", size = 1387785, upload-time = "2025-08-12T06:59:32.491Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/2d/3bd9b08e70067b2124518b308db6a84a4f8901cc8a4317e2e4288cdd9b4d/sentencepiece-0.2.1-cp311-cp311-win32.whl", hash = "sha256:6d297a1748d429ba8534eebe5535448d78b8acc32d00a29b49acf28102eeb094", size = 999555, upload-time = "2025-08-12T06:59:34.475Z" },
+    { url = "https://files.pythonhosted.org/packages/32/b8/f709977f5fda195ae1ea24f24e7c581163b6f142b1005bc3d0bbfe4d7082/sentencepiece-0.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:82d9ead6591015f009cb1be1cb1c015d5e6f04046dbb8c9588b931e869a29728", size = 1054617, upload-time = "2025-08-12T06:59:36.461Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/40/a1fc23be23067da0f703709797b464e8a30a1c78cc8a687120cd58d4d509/sentencepiece-0.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:39f8651bd10974eafb9834ce30d9bcf5b73e1fc798a7f7d2528f9820ca86e119", size = 1033877, upload-time = "2025-08-12T06:59:38.391Z" },
+]
+[[package]]
+name = "setuptools"
+version = "82.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" },
+]
+[[package]]
+name = "sympy"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mpmath" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
+]
+[[package]]
+name = "tokenizers"
+version = "0.22.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/46/fb6854cec3278fbfa4a75b50232c77622bc517ac886156e6afbfa4d8fc6e/tokenizers-0.22.1.tar.gz", hash = "sha256:61de6522785310a309b3407bac22d99c4db5dba349935e99e4d15ea2226af2d9", size = 363123, upload-time = "2025-09-19T09:49:23.424Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/33/f4b2d94ada7ab297328fc671fed209368ddb82f965ec2224eb1892674c3a/tokenizers-0.22.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59fdb013df17455e5f950b4b834a7b3ee2e0271e6378ccb33aa74d178b513c73", size = 3069318, upload-time = "2025-09-19T09:49:11.848Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/58/2aa8c874d02b974990e89ff95826a4852a8b2a273c7d1b4411cdd45a4565/tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8d4e484f7b0827021ac5f9f71d4794aaef62b979ab7608593da22b1d2e3c4edc", size = 2926478, upload-time = "2025-09-19T09:49:09.759Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/3b/55e64befa1e7bfea963cf4b787b2cea1011362c4193f5477047532ce127e/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d2962dd28bc67c1f205ab180578a78eef89ac60ca7ef7cbe9635a46a56422a", size = 3256994, upload-time = "2025-09-19T09:48:56.701Z" },
+    { url = "https://files.pythonhosted.org/packages/71/0b/fbfecf42f67d9b7b80fde4aabb2b3110a97fac6585c9470b5bff103a80cb/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38201f15cdb1f8a6843e6563e6e79f4abd053394992b9bbdf5213ea3469b4ae7", size = 3153141, upload-time = "2025-09-19T09:48:59.749Z" },
+    { url = "https://files.pythonhosted.org/packages/17/a9/b38f4e74e0817af8f8ef925507c63c6ae8171e3c4cb2d5d4624bf58fca69/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1cbe5454c9a15df1b3443c726063d930c16f047a3cc724b9e6e1a91140e5a21", size = 3508049, upload-time = "2025-09-19T09:49:05.868Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/48/dd2b3dac46bb9134a88e35d72e1aa4869579eacc1a27238f1577270773ff/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7d094ae6312d69cc2a872b54b91b309f4f6fbce871ef28eb27b52a98e4d0214", size = 3710730, upload-time = "2025-09-19T09:49:01.832Z" },
+    { url = "https://files.pythonhosted.org/packages/93/0e/ccabc8d16ae4ba84a55d41345207c1e2ea88784651a5a487547d80851398/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afd7594a56656ace95cdd6df4cca2e4059d294c5cfb1679c57824b605556cb2f", size = 3412560, upload-time = "2025-09-19T09:49:03.867Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/c6/dc3a0db5a6766416c32c034286d7c2d406da1f498e4de04ab1b8959edd00/tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2ef6063d7a84994129732b47e7915e8710f27f99f3a3260b8a38fc7ccd083f4", size = 3250221, upload-time = "2025-09-19T09:49:07.664Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/a6/2c8486eef79671601ff57b093889a345dd3d576713ef047776015dc66de7/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba0a64f450b9ef412c98f6bcd2a50c6df6e2443b560024a09fa6a03189726879", size = 9345569, upload-time = "2025-09-19T09:49:14.214Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/16/32ce667f14c35537f5f605fe9bea3e415ea1b0a646389d2295ec348d5657/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:331d6d149fa9c7d632cde4490fb8bbb12337fa3a0232e77892be656464f4b446", size = 9271599, upload-time = "2025-09-19T09:49:16.639Z" },
+    { url = "https://files.pythonhosted.org/packages/51/7c/a5f7898a3f6baa3fc2685c705e04c98c1094c523051c805cdd9306b8f87e/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:607989f2ea68a46cb1dfbaf3e3aabdf3f21d8748312dbeb6263d1b3b66c5010a", size = 9533862, upload-time = "2025-09-19T09:49:19.146Z" },
+    { url = "https://files.pythonhosted.org/packages/36/65/7e75caea90bc73c1dd8d40438adf1a7bc26af3b8d0a6705ea190462506e1/tokenizers-0.22.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a0f307d490295717726598ef6fa4f24af9d484809223bbc253b201c740a06390", size = 9681250, upload-time = "2025-09-19T09:49:21.501Z" },
+    { url = "https://files.pythonhosted.org/packages/30/2c/959dddef581b46e6209da82df3b78471e96260e2bc463f89d23b1bf0e52a/tokenizers-0.22.1-cp39-abi3-win32.whl", hash = "sha256:b5120eed1442765cd90b903bb6cfef781fd8fe64e34ccaecbae4c619b7b12a82", size = 2472003, upload-time = "2025-09-19T09:49:27.089Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/46/e33a8c93907b631a99377ef4c5f817ab453d0b34f93529421f42ff559671/tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138", size = 2674684, upload-time = "2025-09-19T09:49:24.953Z" },
+]
+[[package]]
+name = "torch"
+version = "2.8.0+cu128"
+source = { registry = "https://download.pytorch.org/whl/cu128" }
+dependencies = [
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "jinja2" },
+    { name = "networkx" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "sympy" },
+    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "typing-extensions" },
+]
+wheels = [
+    { url = "https://download-r2.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:039b9dcdd6bdbaa10a8a5cd6be22c4cb3e3589a341e5f904cbb571ca28f55bed" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:34c55443aafd31046a7963b63d30bc3b628ee4a704f826796c865fdfd05bb596" },
+]
+[[package]]
+name = "torchvision"
+version = "0.23.0+cu128"
+source = { registry = "https://download.pytorch.org/whl/cu128" }
+dependencies = [
+    { name = "numpy" },
+    { name = "pillow" },
+    { name = "torch" },
+]
+wheels = [
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.23.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:93f1b5f56b20cd6869bca40943de4fd3ca9ccc56e1b57f47c671de1cdab39cdb" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.23.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:70b3d8bfe04438006ec880c162b0e3aaac90c48b759aa41638dd714c732b182c" },
+]
+[[package]]
+name = "tqdm"
+version = "4.67.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
+]
+[[package]]
+name = "transformers"
+version = "4.57.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "huggingface-hub" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "regex" },
+    { name = "requests" },
+    { name = "safetensors" },
+    { name = "tokenizers" },
+    { name = "tqdm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d6/68/a39307bcc4116a30b2106f2e689130a48de8bd8a1e635b5e1030e46fcd9e/transformers-4.57.1.tar.gz", hash = "sha256:f06c837959196c75039809636cd964b959f6604b75b8eeec6fdfc0440b89cc55", size = 10142511, upload-time = "2025-10-14T15:39:26.18Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267", size = 11990925, upload-time = "2025-10-14T15:39:23.085Z" },
+]
+[[package]]
+name = "triton"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "setuptools" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload-time = "2025-07-30T19:58:29.908Z" },
+]
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
+[[package]]
+name = "virtualenv"
+version = "21.2.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "distlib" },
+    { name = "filelock" },
+    { name = "platformdirs" },
+    { name = "python-discovery" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0c/98/3a7e644e19cb26133488caff231be390579860bbbb3da35913c49a1d0a46/virtualenv-21.2.4.tar.gz", hash = "sha256:b294ef68192638004d72524ce7ef303e9d0cf5a44c95ce2e54a7500a6381cada", size = 5850742, upload-time = "2026-04-14T22:15:31.438Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/27/8d/edd0bd910ff803c308ee9a6b7778621af0d10252219ad9f19ef4d4982a61/virtualenv-21.2.4-py3-none-any.whl", hash = "sha256:29d21e941795206138d0f22f4e45ff7050e5da6c6472299fb7103318763861ac", size = 5831232, upload-time = "2026-04-14T22:15:29.342Z" },
+]
--- a/doc/1.png
+++ b/doc/1.png
--- a/doc/answer.txt
+++ b/doc/answer.txt
+<think>
+The user wants a meal recommendation for two people based on the provided menu.
+**Key constraints:**
+-   **People:** 2.
+-   **Goal:** Balanced meal (mains + starter/dessert).
+-   **Budget:** Budget-conscious but wants highlights.
+**Menu Analysis:**
+-   **Restaurant:** Samsen (Thai Street Food).
+-   **Sections:**
+    -   *Skewers + Sides:* Char-grilled chicken satay, Thai watercress, Thai jasmine rice.
+    -   *Salads:* Char-grilled eggplant & tiger prawn salad, Chopped duck & Thai herb salad, Pounded green papaya salad.
+    -   *Curries:* Khao Soi (chicken/beef), Massaman (beef), Spicy turmeric curry (tiger prawns), Aromatic curry of half a chicken & potatoes.
+    -   *Wok Stir-fry:* Fried rice of Wagyu beef, Crabmeat & spring onions, Pad Thai of tiger prawns, Stir fried spicy Wagyu beef & flat noodles, Stir fried oyster sauce & flat noodles, Wagyu beef wok fried, Thai omelette.
+    -   *Sweet Roti:* Fresh made roti with Nutella/condensed milk, Fresh made banana Roti, Toppings.
+    -   *Desserts:* Mango sticky rice, Coconut ice cream, Thai red tea ice cream, Pandan ice cream, Pandan & coconut dumplings.
+**Budget Calculation (Mental Check):**
+-   Prices are in HKD. Let's estimate values.
+-   Cheap: Watercress (88), Papaya salad (118), Rice (30), Roti toppings (40).
+-   Mid-range: Satay (138), Eggplant salad (158), Curry (158-258), Wok fried rice/beef (158-178).
+-   Expensive: Wagyu beef items (158-198), Massaman (258), Spicy turmeric curry (248).
+**Strategy:**
+1.  **Starters/Sides:** Need something light and shareable. The "Thai jasmine rice" is cheap (30) but maybe too plain as a main starter? Actually, it's listed under Skewers + Sides. Let's look at the Salads. "Pounded green papaya salad" is a classic Thai starter (118). Or "Chopped duck & Thai herb salad" (138).
+2.  **Main Mains:** Need substantial food.
+    -   *Option A (Budget Focus):* Stick to the standard curries and wok stir fries.
+        -   "Khao Soi" (158) or "Massaman" (258 - maybe too expensive for just two people to split comfortably if they want to try multiple things). Let's skip Massaman for budget.
+        -   "Spicy turmeric curry of tiger prawns" (248) - Maybe too expensive for one person to eat alone, but great for sharing.
+        -   "Aromatic curry of half a chicken & potatoes" (258) - Also pricey.
+        -   "Fried rice of Wagyu beef" (158) - Good value.
+        -   "Pad Thai of tiger prawns" (158) - Classic, good value.
+        -   "Stir fried spicy Wagyu beef & flat noodles" (158) - Good value.
+    -   *Option B (Highlight Focus):* Try the "Wagyu" items since that seems to be a signature (multiple Wagyu dishes). But they are all around 158-198. That's getting tight.
+    -   *Option C (Balanced/Mix):*
+        -   One person gets a Wok Stir Fry (e.g., Wagyu Beef Wok Fried or Spicy Wagyu Beef).
+        -   One person gets a Curry (e.g., Khao Soi).
+        -   Share a Salad.
+        -   Share a dessert.
+**Refining the Recommendation:**
+*   **Starter:** "Pounded green papaya salad with toasted peanuts" (118 HKD). It's a classic Thai appetizer, light, and fits the "budget-conscious" vibe well. Alternatively, the "Thai jasmine rice" (30) is very cheap but maybe too simple. Let's suggest the Papaya salad.
+*   **Mains (The "Highlights"):**
+    *   *Dish 1:* "Fried rice of Wagyu beef, tomatoes & chilli" (158 HKD) OR "Stir fried spicy Wagyu beef & flat noodles" (158 HKD). These are good value for money (Wagyu is expensive but the price is reasonable for a restaurant). Let's pick the **Fried Rice of Wagyu Beef** as it's a solid main.
+    *   *Dish 2:* "Khao Soi of curried chicken with fresh egg noodles" (158 HKD). This is a classic Northern Thai dish and a great "highlight." It's affordable compared to the Massaman.
+    *   *Alternative Main:* If they really want to try the "Wagyu" theme, maybe get the "Wagyu beef wok fried" (158) and the "Pad Thai of tiger prawns" (158). That's 366 HKD for two mains. Plus rice/salad. That might be too much.
+    *   *Better Balance:* Let's suggest one Curry and one Wok Stir Fry.
+        -   **Curry:** "Khao Soi" (158) or "Massaman" (258). Massaman is richer and sweeter, often considered a highlight. At 258, it's a bit pricier but worth it if they want to splurge once. Let's offer it as an option.
+        -   **Wok Stir Fry:** "Pad Thai of tiger prawns" (158) is a safe bet. Or "Stir fried oyster sauce & flat noodles" (148) - cheaper.
+*   **Dessert:**
+    *   "Mango sticky rice" (98) is the classic. Good value.
+    *   "Coconut ice cream" (88) is also good.
+    *   Let's suggest splitting the **Mango Sticky Rice** (98 HKD) as it's iconic.
+*   **Rice:** The menu lists "Thai jasmine rice" for 30 HKD. It's cheap! I should definitely suggest ordering this separately or asking if it's free (though it says 30). Wait, looking closely at the menu, "Thai jasmine rice" is listed under Skewers + Sides for 30.00. That's incredibly cheap. I will recommend adding this to make it a full meal.
+**Drafting the Plan:**
+*   **Plan A (The "Classic & Comfortable" Mix):**
+    *   Starter: Pounded Green Papaya Salad (118)
+    *   Main 1: Fried Rice of Wagyu Beef (158) - Shows off the premium ingredient.
+    *   Main 2: Khao Soi (158) - Classic Northern Thai highlight.
+    *   Dessert: Mango Sticky Rice (98)
+    *   Rice: Thai Jasmine Rice (30)
+    *   Total approx: 622 HKD. (Very balanced).
+*   **Plan B (The "Wagyu & Spicy" Thrill):**
+    *   Main 1: Stir fried spicy Wagyu beef & flat noodles (158)
+    *   Main 2: Spicy turmeric curry of tiger prawns (248) - This is a "highlight" but expensive.
+    *   Dessert: Pandan Ice Cream (88)
+    *   Rice: Free/Included? (Usually rice is included, but here it's priced. Let's assume you pay for it if you want extra). Actually, usually in these spots, you order the dish which comes with rice/noodles. I shouldn't double count rice unless specified. I'll assume the mains come with their starches.
+Let's stick to the most logical "Budget-Conscious Highlight" combo.
+**Selected Combo:**
+1.  **Appetizer:** Pounded Green Papaya Salad (118 HKD). It's a staple, light, and not too expensive.
+2.  **Main Course 1 (The Highlight):** Fried Rice of Wagyu Beef (158 HKD). It uses the expensive meat but is priced reasonably, showing good value.
+3.  **Main Course 2 (The Classic):** Khao Soi (158 HKD). It's a signature dish but cheaper than the Massaman (258).
+4.  **Dessert:** Mango Sticky Rice (98 HKD). The quintessential Thai sweet ending.
+5.  **Sides:** Thai Jasmine Rice (30 HKD). Just in case you need extra carbs!
+**Wait, let me re-read the prices.**
+-   Pounded Green Papaya Salad: 118
+-   Fried Rice of Wagyu: 158
+-   Khao Soi: 158
+-   Mango Sticky Rice: 98
+-   Total: ~622 HKD.
+Is there a cheaper option?
+-   Spicy Turmeric Curry (248) is too high.
+-   Massaman (258) is too high.
+-   Spicy Oyster Sauce Noodles (148) is cheaper than Wagyu.
+**Revised Recommendation:**
+To keep it strictly "budget-conscious" while hitting highlights:
+-   **Starter:** Chopped Duck & Thai Herb Salad (138) OR Pounded Green Papaya (118). Let's go with Papaya.
+-   **Main 1:** Pad Thai of Tiger Prawns (158). It's a classic, comes with noodles, and features seafood.
+-   **Main 2:** Fried Rice of Wagyu Beef (158). Features the premium meat.
+-   **Dessert:** Coconut Ice Cream (88) or Mango Sticky Rice (98).
+-   **Rice:** Thai Jasmine Rice (30).
+Actually, looking at the "Curries" section, the "Khao Soi" is 158. The "Massaman" is 258. The "Spicy Turmeric" is 248.
+The "Wok" section has "Stir fried oyster sauce" for 148.
+Let's offer a few options so they can choose.
+**Option 1: The "Wagyu & Curry" Powerhouse**
+-   Main 1: Fried Rice of Wagyu Beef (158)
+-   Main 2: Khao Soi (158)
+-   Side: Pounded Green Papaya Salad (118)
+-   Dessert: Mango Sticky Rice (98)
+-   Total: ~622 HKD.
+**Option 2: The "Seafood & Spicy" Kick**
+-   Main 1: Pad Thai of Tiger Prawns (158)
+-   Main 2: Spicy Turmeric Curry of Tiger Prawns (248) -> Maybe too expensive for one person? No, it's a shared dish.
+-   Side: Thai Jasmine Rice (30)
+-   Dessert: Pandan Ice Cream (88)
+-   Total: ~572 HKD.
+**Option 3: The Strict Budget Pick (Under 500)**
+-   Main 1: Stir fried oyster sauce & flat noodles (148)
+-   Main 2: Fried rice of Wagyu beef (158)
+-   Side: Thai Jasmine Rice (30)
+-   Dessert: Coconut Ice Cream (88)
+-   Total: ~424 HKD.
+-   *Critique:* Misses the "highlight" aspect of the curries.
+**Final Polish:**
+I will recommend a mix that balances the "Wagyu" (premium) with a classic Curry and a light salad. I will explicitly mention the price in HKD.
+**Specific Dish Recommendations:**
+1.  **Starter:** *Pounded green papaya salad with toasted peanuts* (118 HKD). It's a classic Thai appetizer that isn't too heavy.
+2.  **Main 1 (The Premium Pick):** *Fried rice of Wagyu beef, tomatoes & chilli* (158 HKD). This gives you the "Wagyu" experience which is likely the restaurant's selling point, but it's priced reasonably.
+3.  **Main 2 (The Classic Highlight):** *Khao Soi of curried chicken with fresh egg noodles* (158 HKD). This is a famous Northern Thai dish. It's a great alternative to the more expensive Massaman curry.
+4.  **Dessert:** *Mango sticky rice* (98 HKD). The perfect sweet ending.
+5.  **Extra Carbs:** *Thai jasmine rice* (30 HKD). It's surprisingly cheap!
+Total estimated cost: ~622 HKD (approx $65 USD / $90 CAD depending on exchange rate). This feels right for "budget-conscious but highlights."
+I should also mention the *Spicy Turmeric Curry* (248) as a "splurge" option if they really want to try the most expensive item, but advise against it for a strict budget. And the *Massaman* (258) is also a strong contender for a highlight but expensive.
+Let's refine the "Main 2". If they want to try the "Wagyu" theme, maybe get the *Stir fried spicy Wagyu beef & flat noodles* (158) instead of the fried rice? They are similar price. Let's stick with the Fried Rice as it's a more distinct "main".
+Wait, looking at the menu again.
+"Fresh made Roti" options.
+Maybe add a Roti?
+"Fresh made roti with Nutella or condensed milk" (78).
+If they want dessert, maybe swap the Mango Sticky Rice for a Roti? No, dessert is better.
+Let's stick to the plan.
+**Structure of Response:**
+1.  Acknowledge the restaurant (Samsen) and its vibe (Thai Street Food).
+2.  Provide a specific "Combo" recommendation.
+3.  Break down the items (Starter, Main 1, Main 2, Dessert).
+4.  Explain *why* these choices were made (balance, budget, highlights).
+5.  Give a total estimated cost.
+6.  Offer alternatives (e.g., if they really want the Massaman).
+**One detail:** The menu says "All prices are in HKD dollars".
+The "Thai Jasmine Rice" is 30.00.
+The "Pounded green papaya salad" is 118.00.
+The "Fried rice of Wagyu" is 158.00.
+The "Khao Soi" is 158.00.
+The "Mango sticky rice" is 98.00.
+Total: 118 + 158 + 158 + 98 + 30 = 622 HKD.
+Is there anything cheaper?
+"Stir fried oyster sauce & flat noodles" is 148.
+"Chopped duck & Thai herb salad" is 138.
+If they want to save money:
+-   Swap Khao Soi for Oyster Sauce Noodles (148 vs 158 - negligible difference).
+-   Swap Papaya Salad for... actually Papaya is pretty cheap for a salad.
+Okay, the plan is solid. I will present the "Balanced Highlights" combo.
+**One final check on dietary icons:**
+-   Papaya salad has a circle with a line through it (Gluten free? No, usually that's GF. Wait, the legend at bottom right says: Circle with X = Can be made Gluten-free upon request. Circle with slash = No Gluten? No, let's look closer.
+-   Bottom right:
+    -   X in circle: Can be made Gluten-free upon request.
+    -   Slash in circle: No Gluten? No, wait.
+    -   Let's look at the text: "Can be made Gluten-free upon request" next to the X symbol.
+    -   There is a symbol that looks like a crossed out nut or something?
+    -   Ah, top left corner text: "Please inform us of any allergies or dietary requirements."
+    -   Bottom right text: "Can be made Gluten-free upon request" next to the X symbol.
+    -   Wait, the symbol next to "Chopped duck & Thai herb salad" is a circle with a line through it (prohibited). What does it mean?
+    -   Usually, a circle with a slash means "Not available" or "Vegetarian"? No, "Vegetarian menu available upon request" is written separately.
+    -   Let's look at the symbols on the dishes.
+        -   "Char-grilled coconut & chilli prawn skewers": Has a circle with an X inside? No, it looks like a circle with a dot or something. Actually, looking very closely at crop 1, "char-grilled coconut & chilli prawn skewers" has a symbol that looks like a circle with a cross (X) inside? No, it looks like a circle with a diagonal line.
+        -   Let's look at the legend again.
+        -   Bottom Right:
+            -   Symbol: Circle with X inside. Text: "Can be made Gluten-free upon request".
+            -   Symbol: Circle with a slash (prohibited sign). Text: "No Gluten"? No, I don't see that text clearly.
+            -   Wait, let's look at the text "Can be made Gluten-free upon request". Next to it is the X symbol.
+            -   Is there another symbol?
+            -   Ah, look at "Chopped duck & Thai herb salad". There is a symbol next to it. It looks like a circle with a line through it.
+            -   Look at "Pounded green papaya salad". Same symbol.
+            -   Look at "Pad Thai of tiger prawns". Same symbol.
+            -   Look at "Stir fried spicy Wagyu beef". No symbol.
+            -   Look at "Thai omelette". Symbol.
+            -   Look at "Fresh made Roti". Symbol.
+            -   Look at "Desserts". All have the X symbol.
+            -   Okay, the legend at the bottom right is cut off or hard to read.
+            -   "Can be made Gluten-free upon request" is next to the X symbol.
+            -   Is the other symbol "Vegetarian"? No, "Vegetarian menu available upon request" is separate text.
+            -   Maybe the other symbol means "Contains Nuts"? Or "Contains Shellfish"?
+            -   Top left text: "Be advised that all food may contain traces of nuts & shellfish."
+            -   Let's assume the symbol next to the dishes is "Contains [Something]".
+            -   Actually, let's look at the "X" symbol again.
+            -   "Can be made Gluten-free upon request" is next to the X.
+            -   Wait, usually the X means "Contains Gluten".
+            -   Let's re-read the bottom right carefully.
+            -   "Can be made Gluten-free upon request" is next to the X symbol.
+            -   This implies the X means "Contains Gluten" and you can ask for it without.
+            -   Then what is the other symbol?
+            -   Look at "Chopped duck & Thai herb salad". The symbol is a circle with a slash.
+            -   Maybe that means "No Gluten"? No, that doesn't make sense with the "Can be made GF" text.
+            -   Let's look at the text "Can be made Gluten-free upon request". It is placed next to the X symbol.
+            -   Maybe the X symbol *means* "Contains Gluten".
+            -   And the other symbol (circle with slash) means... ?
+            -   Let's look at the very bottom right.
+            -   There is a symbol that looks like a nut? No.
+            -   Let's look at the text "Can be made Gluten-free upon request".
+            -   Actually, looking really closely at the bottom right corner...
+            -   There is a symbol: Circle with a cross (X).
+            -   Text next to it: "Can be made Gluten-free upon request".
+            -   This is confusing. Usually, if a dish *is* gluten free, it has a GF symbol. If it *contains* gluten, it has a G symbol.
+            -   Maybe the text means: "If you see this X, we can make it GF."
+            -   Let's assume the X indicates "Contains Gluten".
+            -   Then what is the other symbol?
+            -   Look at "Chopped duck & Thai herb salad". The symbol is a circle with a diagonal line.
+            -   Look at "Pounded green papaya salad". Same symbol.
+            -   Look at "Pad Thai of tiger prawns". Same symbol.
+            -   Look at "Thai omelette". Same symbol.
+            -   Look at "Fresh made Roti". Same symbol.
+            -   Look at "Desserts". All have the X symbol.
+            -   Okay, let's look at the text again.
+            -   "Can be made Gluten-free upon request" is next to the X.
+            -   Wait, is it possible the text is "Contains Gluten" and the X is just a marker?
+            -   Let's look at the symbol next to "Chopped duck". It's a circle with a line through it.
+            -   Let's look at the symbol next to "Pad Thai". It's a circle with a line through it.
+            -   Let's look at the symbol next to "Thai omelette". It's a circle with a line through it.
+            -   Let's look at the symbol next to "Fresh made Roti". It's a circle with a line through it.
+            -   Let's look at the symbol next to "Desserts". It's a circle with an X inside.
+            -   This is tricky. Let's look at the top left text again. "Please inform us of any allergies...".
+            -   Let's assume the standard iconography.
+            -   Usually:
+                -   GF symbol (circle with G).
+                -   NUT symbol.
+                -   SHELLFISH symbol.
+            -   Here, the text "Can be made Gluten-free upon request" is next to the X symbol.
+            -   Maybe the X symbol *means* "Contains Gluten" and the note explains that it's flexible.
+            -   And the other symbol (circle with slash)... could it be "Contains Pork"? (Since it's a Thai place, maybe pork is restricted? No, "Chopped duck" is there).
+            -   Could it be "Contains Shellfish"? "Be advised that all food may contain traces of nuts & shellfish."
+            -   Let's look at the symbol next to "Chopped duck". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Pad Thai". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Thai omelette". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Fresh made Roti". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Desserts". It looks like a circle with an X.
+            -   Okay, let's look at the bottom right text again.
+            -   "Can be made Gluten-free upon request" is next to the X symbol.
+            -   Wait, is it possible the text is "Contains Gluten" and the X is the symbol for "Contains"?
+            -   No, the text is explicit: "Can be made Gluten-free upon request".
+            -   This implies the symbol indicates something that *can* be removed.
+            -   So the X symbol means "Contains Gluten".
+            -   Then what is the other symbol?
+            -   Maybe the other symbol means "Contains Nuts"?
+            -   Or maybe "Contains Shellfish"?
+            -   The text at the top says "traces of nuts & shellfish".
+            -   Let's assume the circle with the slash means "Contains [Common Allergen]".
+            -   Actually, looking very closely at crop 1, the symbol next to "Chopped duck" looks like a circle with a 'V' or a checkmark? No, it's a circle with a diagonal line.
+            -   Let's look at the symbol next to "Pad Thai". It's a circle with a diagonal line.
+            -   Let's look at the symbol next to "Thai omelette". It's a circle with a diagonal line.
+            -   Let's look at the symbol next to "Fresh made Roti". It's a circle with a diagonal line.
+            -   Let's look at the symbol next to "Desserts". It's a circle with an X.
+            -   Okay, let's look at the bottom right corner again.
+            -   There is a symbol: Circle with a cross (X).
+            -   Text: "Can be made Gluten-free upon request".
+            -   There is another symbol below it? No.
+            -   Wait, look at the very bottom right.
+            -   "Can be made Gluten-free upon request" is next to the X symbol.
+            -   Is there a symbol that means "Contains Gluten"?
+            -   Maybe the circle with the slash means "Contains Gluten"?
+            -   If the circle with the slash means "Contains Gluten", then why does the text say "Can be made GF"?
+            -   Maybe the text applies to *all* dishes with the X symbol?
+            -   Let's look at the symbol next to "Chopped duck". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Pad Thai". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Thai omelette". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Fresh made Roti". It looks like a circle with a slash.
+            -   Let's look at the symbol next to "Desserts". It looks like a circle with an X.
+            -   This is confusing. Let's ignore the specific allergen meanings unless I'm sure, and focus on the budget/highlights.
+            -   Actually, let's look at the symbol next to "Chopped duck" again. It looks like a circle with a line through it.
+            -   Let's look at the symbol next to "Pad Thai". It looks like a circle with a line through it.
+            -   Let's look at the symbol next to "Thai omelette". It looks like a circle with a line through it.
+            -   Let's look at the symbol next to "Fresh made Roti". It looks like a circle with a line through it.
+            -   Let's look at the symbol next to "Desserts". It looks like a circle with an X.
+            -   Okay, let's assume the X symbol means "Contains Gluten" (standard ISO symbol is a G, but maybe they use X for "Contains"? No, that's unlikely).
+            -   Standard ISO symbols:
+                -   GF (Circle with G)
+                -   NUT (Circle with Nut)
+                -   SHELLFISH (Circle with Shellfish)
+            -   Here, the text "Can be made Gluten-free upon request" is next to the X symbol.
+            -   This suggests the X symbol *identifies* dishes that contain gluten.
+            -   So, dishes with the X symbol (like Desserts, Roti, etc.) contain gluten.
+            -   What about the circle with the slash?
+            -   Maybe it means "Contains Pork"? (If they are Halal? No, "Thailand to bowls").
+            -   Maybe it means "Contains Shellfish"? (Text says "traces of nuts & shellfish").
+            -   Let's look at the symbol next to "Chopped duck". It's a circle with a slash.
+            -   Let's look at the symbol next to "Pad Thai". It's a circle with a slash.
+            -   Let's look at the symbol next to "Thai omelette". It's a circle with a slash.
+            -   Let's look at the symbol next to "Fresh made Roti". It's a circle with a slash.
+            -   Let's look at the symbol next to "Desserts". It's a circle with an X.
+            -   Okay, I will assume the circle with the slash means "Contains [Something common]", and the X means "Contains Gluten".
+            -   Actually, looking at the bottom right, the text "Can be made Gluten-free upon request" is next to the X symbol.
+            -   And there is a symbol that looks like a nut? No.
+            -   Let's just stick to the menu items and general recommendations. I won't overanalyze the allergen symbols unless necessary. The user didn't ask about allergies specifically, just "balanced meal" and "budget-conscious".
+**Final Plan:**
+1.  **Starter:** Pounded Green Papaya Salad (118 HKD). Classic, light, cheap.
+2.  **Main 1:** Fried Rice of Wagyu Beef (158 HKD). Highlights the "Wagyu" theme, good value.
+3.  **Main 2:** Khao Soi (158 HKD). Classic Northern Thai curry, affordable highlight.
+4.  **Dessert:** Mango Sticky Rice (98 HKD). Classic sweet ending.
+5.  **Extra:** Thai Jasmine Rice (30 HKD).
+6.  **Total:** ~622 HKD.
+I will formulate the response now. I will break it down by course and explain the choices. I'll also mention the "Massaman" and "Spicy Turmeric" as optional splurges.
+One detail: "Thai Jasmine Rice" is 30.00. Is that per person or for the table? Usually per person. I'll assume per person.
+So 30 x 2 = 60.
+Total: 118 + 158 + 158 + 98 + 60 = 692 HKD.
+Still reasonable.
+Wait, looking at the menu, "Thai Jasmine Rice" is under "Skewers + Sides".
+"Thai jasmine rice 30.00".
+It's likely a side dish you order to go with skewers.
+But you can order it as a standalone rice dish.
+I'll include it as an option ("and don't forget the cheap rice!").
+Let's write the response.
+-   Start with a friendly greeting.
+-   Present the "Golden Ratio" combo.
+-   List items with prices.
+-   Add a "Why this works" section.
+-   Add a "Budget Alternative" section (if they want to save money).
+-   Mention the "Splurge" options (Massaman).
+Check prices one last time.
+-   Pounded green papaya salad: 118.00
+-   Fried rice of Wagyu beef: 158.00
+-   Khao Soi: 158.00
+-   Mango sticky rice: 98.00
+-   Thai jasmine rice: 30.00
+Total: 622 HKD.
+Looks good. I will structure the answer clearly.
+I should mention that prices are in HKD.
+I should mention that "Samson" is a Thai street food spot.
+I should mention the "Wagyu" aspect is a key highlight.
+One small correction: "Khao Soi" is 158. "Massaman" is 258.
+"Spicy turmeric curry" is 248.
+"Armatic curry of half a chicken" is 258.
+These are the expensive curries.
+The "Wok" section:
+-   Fried rice of Wagyu: 158
+-   Crabmeat & spring onions: 158
+-   Pad Thai of tiger prawns: 158
+-   Stir fried spicy Wagyu: 158
+-   Stir fried oyster sauce: 148
+-   Wagyu beef wok fried: 158
+-   Thai omelette: 178
+So the "Wagyu" items are consistently around 158-198.
+The "Curry" items vary wildly (158 to 258).
+So picking the 158 Curry (Khao Soi) is a smart budget move to try a curry without breaking the bank.
+The plan is solid.
+</think>
+Based on the menu for **Samson** (a Thai Street Food spot), here is a balanced, budget-friendly recommendation for two people that hits the "highlights" without breaking the bank.
+### The "Balanced Highlights" Combo
+This combination gives you a classic Thai appetizer, a premium protein main, a rich curry, and a traditional sweet ending. All prices are in **HKD**.
+**1. Starter: Pounded Green Papaya Salad (118 HKD)**
+*   **Why:** It’s a quintessential Thai appetizer. It’s light, refreshing, and significantly cheaper than the salads in the "Salads" section (which start at 138 HKD). It’s perfect for cleansing the palate before the heavier mains.
+**2. Main Course 1: Fried Rice of Wagyu Beef (158 HKD)**
+*   **Why:** This is likely the restaurant's signature "highlight." You get a taste of the expensive Wagyu beef, but at 158 HKD, it’s priced very reasonably for a restaurant portion. It offers great value.
+**3. Main Course 2: Khao Soi (158 HKD)**
+*   **Why:** This is a famous Northern Thai dish (curried chicken with fresh egg noodles). While the *Massaman* curry (258 HKD) is also a highlight, the Khao Soi is much more budget-friendly and still very flavorful. It adds a nice creamy, spicy element to the meal.
+**4. Dessert: Mango Sticky Rice (98 HKD)**
+*   **Why:** The classic Thai sweet ending. It’s a great way to finish the meal without spending too much (compared to the ice creams which are 88-108 HKD).
+**5. Extra Carbs: Thai Jasmine Rice (30 HKD)**
+*   **Why:** Don't forget this! It’s listed for only 30 HKD. It’s a great deal to grab a couple of bowls if you need extra rice to go with the curries or wok stir-fries.
+**Total Estimated Cost:** ~**622 HKD** (approx. $65 USD)
+---
+### Alternative Options
+**If you really want to try the "Expensive" Curries:**
+You could swap the **Khao Soi** for the **Massaman Curry (258 HKD)** or the **Spicy Turmeric Curry of Tiger Prawns (248 HKD)**. These are the "wow" factor dishes, but they push the total budget higher (around 700-750 HKD). Since you mentioned being budget-conscious, I'd stick to the Khao Soi unless you really crave a super rich, slow-cooked stew.
+**If you want to try more Seafood:**
+Instead of the Fried Rice of Wagyu, you could get the **Pad Thai of Tiger Prawns (158 HKD)**. It’s a great all-in-one noodle dish that features seafood, keeping the price exactly the same as the Wagyu rice.
\ No newline at end of file
--- a/doc/ou0.png
+++ b/doc/ou0.png