Merge tag 'v0.10.0' into v0.10.0-dev

711aa9d5 · zhuwenwen · 751c492c · 6d8d0a24 · 711aa9d5 · 711aa9d5
Commit 711aa9d5 authored Jul 30, 2025 by zhuwenwen
20 changed files
--- a/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
+++ b/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
+#!/bin/bash
+
+HOST="0.0.0.0"
+PORT=8006
+DATA_PARALLEL_SIZE=4
+REDUNDANT_EXPERTS=0
+LOCAL_MODEL_PATH="/models/models--deepseek-ai--DeepSeek-V2-Lite/snapshots/604d5664dddd88a0433dbae533b7fe9472482de0"
+MODEL_NAME="deepseek-ai/DeepSeek-V2-Lite"
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --dp)
+            DATA_PARALLEL_SIZE="$2"
+            shift 2
+            ;;
+        --re)
+            REDUNDANT_EXPERTS="$2"
+            shift 2
+            ;;
+        --host)
+            HOST="$2"
+            shift 2
+            ;;
+        --port)
+            PORT="$2"
+            shift 2
+            ;;
+        --model)
+            MODEL_NAME="$2"
+            shift 2
+            ;;
+        --local-model)
+            MODEL_NAME=$LOCAL_MODEL_PATH
+            shift
+            ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS]"
+            echo "Options:"
+            echo "  --dp SIZE                    Set data parallel size (default: 4)"
+            echo "  --re SIZE                    Set redundant experts (default: 0)"
+            echo "  --host HOST                  Set host address (default: 0.0.0.0)"
+            echo "  --port PORT                  Set port number (default: 8006)"
+            echo "  --model MODEL_NAME           Set model name or path"
+            echo "  -h, --help                   Show this help message"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            echo "Use -h or --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALLEL_SIZE and redundant experts: $REDUNDANT_EXPERTS"
+
+export RAY_DEDUP_LOGS=0
+export VLLM_USE_V1=1
+export VLLM_ALL2ALL_BACKEND="pplx"
+export VLLM_USE_DEEP_GEMM=1
+
+vllm serve $MODEL_NAME \
+    --data-parallel-size $DATA_PARALLEL_SIZE \
+    --data-parallel-size-local $DATA_PARALLEL_SIZE \
+    --data-parallel-backend ray \
+    --enforce-eager \
+    --enable-expert-parallel \
+    --enable-eplb \
+    --num-redundant-experts $REDUNDANT_EXPERTS \
+    --trust-remote-code \
+    --host $HOST \
+    --port $PORT
--- a/examples/online_serving/multi-node-serving.sh
+++ b/examples/online_serving/multi-node-serving.sh
 #!/bin/bash
+#
+# Helper script to manually start or join a Ray cluster for online serving of vLLM models.
+# This script is first executed on the head node, and then on each worker node with the IP address
+# of the head node.
+#
+# Subcommands:
+#   leader: Launches a Ray head node and blocks until the cluster reaches the expected size (head + workers).
+#   worker: Starts a worker node that connects to an existing Ray head node.
+#
+# Example usage:
+# On the head node machine, start the Ray head node process and run a vLLM server.
+#   ./multi-node-serving.sh leader --ray_port=6379 --ray_cluster_size=<SIZE> [<extra ray args>]  && \
+#   python3 -m vllm.entrypoints.openai.api_server --port 8080 --model meta-llama/Meta-Llama-3.1-405B-Instruct --tensor-parallel-size 8 --pipeline_parallel_size 2
+# 
+# On each worker node, start the Ray worker node process.
+#   ./multi-node-serving.sh worker --ray_address=<HEAD_NODE_IP> --ray_port=6379 [<extra ray args>]
+#
+# About Ray:
+# Ray is an open-source distributed execution framework that simplifies
+# distributed computing. Learn more:
+# https://ray.io/

-subcommand=$1
-shift

-ray_port=6379
-ray_init_timeout=300
-declare -a start_params
+subcommand=$1  # Either "leader" or "worker".
+shift          # Remove the subcommand from the argument list.

+ray_port=6379              # Port used by the Ray head node.
+ray_init_timeout=300       # Seconds to wait before timing out.
+declare -a start_params    # Parameters forwarded to the underlying 'ray start' command.
+
+# Handle the worker subcommand.
 case "$subcommand" in
  worker)
    ray_address=""
@@ -32,6 +55,7 @@ case "$subcommand" in
      exit 1
    fi

+    # Retry until the worker node connects to the head node or the timeout expires.
    for (( i=0; i < $ray_init_timeout; i+=5 )); do
      ray start --address=$ray_address:$ray_port --block "${start_params[@]}"
      if [ $? -eq 0 ]; then
@@ -45,6 +69,7 @@ case "$subcommand" in
    exit 1
    ;;

+  # Handle the leader subcommand.
  leader)
    ray_cluster_size=""
    while [ $# -gt 0 ]; do
@@ -69,10 +94,10 @@ case "$subcommand" in
      exit 1
    fi

-    # start the ray daemon
+    # Start the Ray head node.
    ray start --head --port=$ray_port "${start_params[@]}"

-    # wait until all workers are active
+    # Poll Ray until every worker node is active.
    for (( i=0; i < $ray_init_timeout; i+=5 )); do
        active_nodes=`python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))'`
        if [ $active_nodes -eq $ray_cluster_size ]; then

--- a/examples/online_serving/openai_cross_encoder_score_for_multimodal.py
+++ b/examples/online_serving/openai_cross_encoder_score_for_multimodal.py
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Example online usage of Score API.
+
+Run `vllm serve <model> --task score` to start up the server in vLLM.
+"""
+
+import argparse
+import pprint
+
+import requests
+
+
+def post_http_request(prompt: dict, api_url: str) -> requests.Response:
+    headers = {"User-Agent": "Test Client"}
+    response = requests.post(api_url, headers=headers, json=prompt)
+    return response
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=8000)
+    parser.add_argument("--model", type=str, default="jinaai/jina-reranker-m0")
+    return parser.parse_args()
+
+
+def main(args):
+    api_url = f"http://{args.host}:{args.port}/score"
+    model_name = args.model
+
+    text_1 = "slm markdown"
+    text_2 = {
+        "content": [
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png"
+                },
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
+                },
+            },
+        ]
+    }
+    prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
+    score_response = post_http_request(prompt=prompt, api_url=api_url)
+    print("\nPrompt when text_1 is string and text_2 is a image list:")
+    pprint.pprint(prompt)
+    print("\nScore Response:")
+    pprint.pprint(score_response.json())
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
--- a/examples/online_serving/ray_serve_deepseek.py
+++ b/examples/online_serving/ray_serve_deepseek.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
-Example to deploy DeepSeek R1 or V3 with Ray Serve LLM.
-See more details at:
-https://docs.ray.io/en/latest/serve/tutorials/serve-deepseek.html
-And see Ray Serve LLM documentation at:
-https://docs.ray.io/en/latest/serve/llm/serving-llms.html
+Deploy DeepSeek R1 or V3 with Ray Serve LLM.
+
+Ray Serve LLM is a scalable and production-grade model serving library built
+on the Ray distributed computing framework and first-class support for the vLLM engine.
+
+Key features:
+- Automatic scaling, back-pressure, and load balancing across a Ray cluster.
+- Unified multi-node multi-model deployment.
+- Exposes an OpenAI-compatible HTTP API.
+- Multi-LoRA support with shared base models.

-Run `python3 ray_serve_deepseek.py` to deploy the model.
+Run `python3 ray_serve_deepseek.py` to launch an endpoint.
+
+Learn more in the official Ray Serve LLM documentation:
+https://docs.ray.io/en/latest/serve/llm/serving-llms.html
 """

 from ray import serve
@@ -16,9 +24,8 @@ from ray.serve.llm import LLMConfig, build_openai_app
 llm_config = LLMConfig(
    model_loading_config={
        "model_id": "deepseek",
-        # Since DeepSeek model is huge, it is recommended to pre-download
-        # the model to local disk, say /path/to/the/model and specify:
-        # model_source="/path/to/the/model"
+        # Pre-downloading the model to local storage is recommended since
+        # the model is large. Set model_source="/path/to/the/model".
        "model_source": "deepseek-ai/DeepSeek-R1",
    },
    deployment_config={
@@ -27,10 +34,10 @@ llm_config = LLMConfig(
            "max_replicas": 1,
        }
    },
-    # Change to the accelerator type of the node
+    # Set to the node's accelerator type.
    accelerator_type="H100",
    runtime_env={"env_vars": {"VLLM_USE_V1": "1"}},
-    # Customize engine arguments as needed (e.g. vLLM engine kwargs)
+    # Customize engine arguments as required (for example, vLLM engine kwargs).
    engine_kwargs={
        "tensor_parallel_size": 8,
        "pipeline_parallel_size": 2,
@@ -44,6 +51,6 @@ llm_config = LLMConfig(
    },
 )

-# Deploy the application
+# Deploy the application.
 llm_app = build_openai_app({"llm_configs": [llm_config]})
 serve.run(llm_app)
--- a/examples/online_serving/run_cluster.sh
+++ b/examples/online_serving/run_cluster.sh
 #!/bin/bash
+#
+# Launch a Ray cluster inside Docker for vLLM inference.
+#
+# This script can start either a head node or a worker node, depending on the
+# --head or --worker flag provided as the third positional argument.
+#
+# Usage:
+# 1. Designate one machine as the head node and execute:
+#    bash run_cluster.sh \
+#         vllm/vllm-openai \
+#         <head_node_ip> \
+#         --head \
+#         /abs/path/to/huggingface/cache \
+#         -e VLLM_HOST_IP=<head_node_ip>
+#
+# 2. On every worker machine, execute:
+#    bash run_cluster.sh \
+#         vllm/vllm-openai \
+#         <head_node_ip> \
+#         --worker \
+#         /abs/path/to/huggingface/cache \
+#         -e VLLM_HOST_IP=<worker_node_ip>
+# 
+# Each worker requires a unique VLLM_HOST_IP value.
+# Keep each terminal session open. Closing a session stops the associated Ray
+# node and thereby shuts down the entire cluster.
+# Every machine must be reachable at the supplied IP address.
+#
+# The container is named "node-<random_suffix>". To open a shell inside
+# a container after launch, use:
+#       docker exec -it node-<random_suffix> /bin/bash
+#
+# Then, you can execute vLLM commands on the Ray cluster as if it were a
+# single machine, e.g. vllm serve ...
+#
+# To stop the container, use:
+#       docker stop node-<random_suffix>

-# Check for minimum number of required arguments
+# Check for minimum number of required arguments.
 if [ $# -lt 4 ]; then
-    echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]"
+    echo "Usage: $0 docker_image head_node_ip --head|--worker path_to_hf_home [additional_args...]"
    exit 1
 fi

-# Assign the first three arguments and shift them away
+# Extract the mandatory positional arguments and remove them from $@.
 DOCKER_IMAGE="$1"
 HEAD_NODE_ADDRESS="$2"
-NODE_TYPE="$3"  # Should be --head or --worker
+NODE_TYPE="$3"  # Should be --head or --worker.
 PATH_TO_HF_HOME="$4"
 shift 4

-# Additional arguments are passed directly to the Docker command
+# Preserve any extra arguments so they can be forwarded to Docker.
 ADDITIONAL_ARGS=("$@")

-# Validate node type
+# Validate the NODE_TYPE argument.
 if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
    echo "Error: Node type must be --head or --worker"
    exit 1
 fi

-# Define a function to cleanup on EXIT signal
+# Generate a unique container name with random suffix.
+# Docker container names must be unique on each host.
+# The random suffix allows multiple Ray containers to run simultaneously on the same machine,
+# for example, on a multi-GPU machine.
+CONTAINER_NAME="node-${RANDOM}"
+
+# Define a cleanup routine that removes the container when the script exits.
+# This prevents orphaned containers from accumulating if the script is interrupted.
 cleanup() {
-    docker stop node
-    docker rm node
+    docker stop "${CONTAINER_NAME}"
+    docker rm "${CONTAINER_NAME}"
 }
 trap cleanup EXIT

-# Command setup for head or worker node
+# Build the Ray start command based on the node role.
+# The head node manages the cluster and accepts connections on port 6379, 
+# while workers connect to the head's address.
 RAY_START_CMD="ray start --block"
 if [ "${NODE_TYPE}" == "--head" ]; then
    RAY_START_CMD+=" --head --port=6379"
@@ -37,11 +83,15 @@ else
    RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379"
 fi

-# Run the docker command with the user specified parameters and additional arguments
+# Launch the container with the assembled parameters.
+# --network host: Allows Ray nodes to communicate directly via host networking
+# --shm-size 10.24g: Increases shared memory
+# --gpus all: Gives container access to all GPUs on the host
+# -v HF_HOME: Mounts HuggingFace cache to avoid re-downloading models
 docker run \
    --entrypoint /bin/bash \
    --network host \
-    --name node \
+    --name "${CONTAINER_NAME}" \
    --shm-size 10.24g \
    --gpus all \
    -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \

--- a/examples/others/tensorize_vllm_model.py
+++ b/examples/others/tensorize_vllm_model.py
@@ -4,6 +4,7 @@
 import argparse
 import dataclasses
 import json
+import logging
 import os
 import uuid

@@ -15,9 +16,13 @@ from vllm.model_executor.model_loader.tensorizer import (
    TensorizerConfig,
    tensorize_lora_adapter,
    tensorize_vllm_model,
+    tensorizer_kwargs_arg,
 )
 from vllm.utils import FlexibleArgumentParser

+logger = logging.getLogger()
+
+
 # yapf conflicts with isort for this docstring
 # yapf: disable
 """
@@ -119,7 +124,7 @@ vllm serve <model_path> \
 """


-def parse_args():
+def get_parser():
    parser = FlexibleArgumentParser(
        description="An example script that can be used to serialize and "
        "deserialize vLLM models. These models "
@@ -135,13 +140,13 @@ def parse_args():
        required=False,
        help="Path to a LoRA adapter to "
        "serialize along with model tensors. This can then be deserialized "
-        "along with the model by passing a tensorizer_config kwarg to "
-        "LoRARequest with type TensorizerConfig. See the docstring for this "
-        "for a usage example."
-
+        "along with the model by instantiating a TensorizerConfig object, "
+        "creating a dict from it with TensorizerConfig.to_serializable(), "
+        "and passing it to LoRARequest's initializer with the kwarg "
+        "tensorizer_config_dict."
    )

-    subparsers = parser.add_subparsers(dest='command')
+    subparsers = parser.add_subparsers(dest='command', required=True)

    serialize_parser = subparsers.add_parser(
        'serialize', help="Serialize a model to `--serialized-directory`")
@@ -171,6 +176,14 @@ def parse_args():
        "where `suffix` is given by `--suffix` or a random UUID if not "
        "provided.")

+    serialize_parser.add_argument(
+        "--serialization-kwargs",
+        type=tensorizer_kwargs_arg,
+        required=False,
+        help=("A JSON string containing additional keyword arguments to "
+              "pass to Tensorizer's TensorSerializer during "
+              "serialization."))
+
    serialize_parser.add_argument(
        "--keyfile",
        type=str,
@@ -186,9 +199,17 @@ def parse_args():
    deserialize_parser.add_argument(
        "--path-to-tensors",
        type=str,
-        required=True,
+        required=False,
        help="The local path or S3 URI to the model tensors to deserialize. ")

+    deserialize_parser.add_argument(
+        "--serialized-directory",
+        type=str,
+        required=False,
+        help="Directory with model artifacts for loading. Assumes a "
+             "model.tensors file exists therein. Can supersede "
+             "--path-to-tensors.")
+
    deserialize_parser.add_argument(
        "--keyfile",
        type=str,
@@ -196,11 +217,27 @@ def parse_args():
        help=("Path to a binary key to use to decrypt the model weights,"
              " if the model was serialized with encryption"))

-    TensorizerArgs.add_cli_args(deserialize_parser)
+    deserialize_parser.add_argument(
+        "--deserialization-kwargs",
+        type=tensorizer_kwargs_arg,
+        required=False,
+        help=("A JSON string containing additional keyword arguments to "
+              "pass to Tensorizer's `TensorDeserializer` during "
+              "deserialization."))

-    return parser.parse_args()
+    TensorizerArgs.add_cli_args(deserialize_parser)

+    return parser

+def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
+                                              cfg: TensorizerConfig):
+    for k, v in extra_cfg.items():
+        if hasattr(cfg, k):
+            setattr(cfg, k, v)
+            logger.info(
+                "Updating TensorizerConfig with %s from "
+                "--model-loader-extra-config provided", k
+            )

 def deserialize(args, tensorizer_config):
    if args.lora_path:
@@ -230,7 +267,8 @@ def deserialize(args, tensorizer_config):
            lora_request=LoRARequest("sql-lora",
                                     1,
                                     args.lora_path,
-                                     tensorizer_config = tensorizer_config)
+                                     tensorizer_config_dict = tensorizer_config
+                                     .to_serializable())
            )
        )
    else:
@@ -243,7 +281,8 @@ def deserialize(args, tensorizer_config):


 def main():
-    args = parse_args()
+    parser = get_parser()
+    args = parser.parse_args()

    s3_access_key_id = (getattr(args, 's3_access_key_id', None)
                        or os.environ.get("S3_ACCESS_KEY_ID", None))
@@ -265,13 +304,24 @@ def main():
    else:
        keyfile = None

+    extra_config = {}
    if args.model_loader_extra_config:
-        config = json.loads(args.model_loader_extra_config)
-        tensorizer_args = \
-            TensorizerConfig(**config)._construct_tensorizer_args()
-        tensorizer_args.tensorizer_uri = args.path_to_tensors
-    else:
-        tensorizer_args = None
+        extra_config = json.loads(args.model_loader_extra_config)
+
+
+    tensorizer_dir = (args.serialized_directory or
+                      extra_config.get("tensorizer_dir"))
+    tensorizer_uri = (getattr(args, "path_to_tensors", None)
+                      or extra_config.get("tensorizer_uri"))
+
+    if tensorizer_dir and tensorizer_uri:
+        parser.error("--serialized-directory and --path-to-tensors "
+                     "cannot both be provided")
+
+    if not tensorizer_dir and not tensorizer_uri:
+        parser.error("Either --serialized-directory or --path-to-tensors "
+                     "must be provided")
+

    if args.command == "serialize":
        eng_args_dict = {f.name: getattr(args, f.name) for f in
@@ -281,7 +331,7 @@ def main():
            argparse.Namespace(**eng_args_dict)
        )

-        input_dir = args.serialized_directory.rstrip('/')
+        input_dir = tensorizer_dir.rstrip('/')
        suffix = args.suffix if args.suffix else uuid.uuid4().hex
        base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
        if engine_args.tensor_parallel_size > 1:
@@ -292,21 +342,29 @@ def main():
        tensorizer_config = TensorizerConfig(
            tensorizer_uri=model_path,
            encryption_keyfile=keyfile,
-            **credentials)
+            serialization_kwargs=args.serialization_kwargs or {},
+            **credentials
+        )

        if args.lora_path:
            tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
            tensorize_lora_adapter(args.lora_path, tensorizer_config)

+        merge_extra_config_with_tensorizer_config(extra_config,
+                                                  tensorizer_config)
        tensorize_vllm_model(engine_args, tensorizer_config)

    elif args.command == "deserialize":
-        if not tensorizer_args:
-            tensorizer_config = TensorizerConfig(
-                tensorizer_uri=args.path_to_tensors,
-                encryption_keyfile = keyfile,
-                **credentials
-            )
+        tensorizer_config = TensorizerConfig(
+            tensorizer_uri=args.path_to_tensors,
+            tensorizer_dir=args.serialized_directory,
+            encryption_keyfile=keyfile,
+            deserialization_kwargs=args.deserialization_kwargs or {},
+            **credentials
+        )
+
+        merge_extra_config_with_tensorizer_config(extra_config,
+                                                  tensorizer_config)
        deserialize(args, tensorizer_config)
    else:
        raise ValueError("Either serialize or deserialize must be specified.")

--- a/examples/tool_chat_template_deepseekr1.jinja
+++ b/examples/tool_chat_template_deepseekr1.jinja
@@ -11,7 +11,7 @@
            {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
        {%- endif %}
    {%- endif %}
-{%- endfor %}
+{%- endfor -%}

 {#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
 {% if tools is defined and tools is not none %}
@@ -27,8 +27,8 @@
    {% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
 {% endif %}

-{{ bos_token }}
-{{ ns.system_prompt }}
+{{- bos_token }}
+{{- ns.system_prompt }}
 {%- for message in messages %}
    {% set content = message['content'] %}
    {%- if message['role'] == 'user' %}
@@ -45,7 +45,7 @@
    {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
        {%- set ns.is_last_user = false -%}
        {%- if ns.is_tool %}
-            {{'<｜tool▁outputs▁end｜>'}}
+            {{- '<｜tool▁outputs▁end｜>'}}
        {%- endif %}
        {%- set ns.is_first = false %}
        {%- set ns.is_tool = false -%}
@@ -53,40 +53,40 @@
        {%- for tool in message['tool_calls'] %}
            {%- if not ns.is_first %}
                {%- if content is none %}
-                    {{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+                    {{- '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
                {%- else %}
-                    {{content + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+                    {{- content + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
                {%- endif %}
                {%- set ns.is_first = true -%}
            {%- else %}
-                {{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+                {{- '\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
            {%- endif %}
        {%- endfor %}
-        {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+        {{- '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
    {%- endif %}
    {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
        {%- set ns.is_last_user = false -%}
        {%- if ns.is_tool %}
-            {{'<｜tool▁outputs▁end｜>' + content + '<｜end▁of▁sentence｜>'}}
+            {{- '<｜tool▁outputs▁end｜>' + content + '<｜end▁of▁sentence｜>'}}
            {%- set ns.is_tool = false -%}
        {%- else %}
-            {{content + '<｜end▁of▁sentence｜>'}}
+            {{- content + '<｜end▁of▁sentence｜>'}}
        {%- endif %}
    {%- endif %}
    {%- if message['role'] == 'tool' %}
        {%- set ns.is_last_user = false -%}
        {%- set ns.is_tool = true -%}
        {%- if ns.is_output_first %}
-            {{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
+            {{- '<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
            {%- set ns.is_output_first = false %}
        {%- else %}
-            {{'\n<｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
+            {{- '\n<｜tool▁output▁begin｜>' + content + '<｜tool▁output▁end｜>'}}
        {%- endif %}
    {%- endif %}
 {%- endfor -%}
 {% if ns.is_tool %}
-    {{'<｜tool▁outputs▁end｜>'}}
-{% endif %}
+    {{- '<｜tool▁outputs▁end｜>'}}
+{%- endif %}
 {% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
-    {{'<｜Assistant｜>'}}
-{% endif %}
+    {{- '<｜Assistant｜>'}}
+{%- endif %}
\ No newline at end of file
--- a/examples/tool_chat_template_hunyuan_a13b.jinja
+++ b/examples/tool_chat_template_hunyuan_a13b.jinja
+{% set loop_messages = messages %}
+{% if tools %}
+    {% set weekday_map = {'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日'} %}
+    {% set weekday_cn = weekday_map[strftime_now('%A')] %}
+    {% set datetime_str = strftime_now('%Y-%m-%d %H:%M:%S') %}
+    {% set datetime_str = datetime_str + ' ' + weekday_cn %}
+    {% for message in loop_messages %}
+        {% if 'content' in message %}
+            {% set content = message['content'] %}
+        {% else %}
+            {% set content = '' %}
+        {% endif %}
+        {% if loop.index0 == 0 %}
+            {% set content_tmp = '你是一位函数组合专家。你会得到一个问题和一组可能的函数。根据问题，你需要进行一个或多个函数/工具调用以实现目的。
+如果没有一个函数可以使用，请直接使用自然语言回复用户，以助手：开头。
+如果给定的问题缺少函数所需的参数，请使用自然语言进行提问，向用户询问必要信息，以助手：开头。
+如果调用结果已经足够回答用户问题，请对历史结果进行总结，使用自然语言回复用户，以助手：开头。
+你应该只在工具调用部分返回函数调用。如果你决定调用任何函数，你必须将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>。你不应该在回复中包含任何其他文本。以下是你可以调用的函数列表，格式为JSON。
+' %}
+            {% set content_tmp = content_tmp + '
+' + tools | tojson + '
+' %}
+            {% if message['role'] == 'system' %}
+                {% set content_tmp = content_tmp + '
+额外要求：
+' + content + '
+
+如果你决定返回函数调用，请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>，不得包含其他文本。如果额外要求里有格式要求，请忽略，以此处为准。
+否则，请参考开头说的三种情况，以助手：开头进行回复。
+
+如果额外要求里有时间信息，就以额外要求里的时间为准，否则，参考当前时间：' + datetime_str %}
+                {% set content = '<|startoftext|>' + content_tmp + '<|extra_4|>' %}
+            {% elif message['role'] == 'user' %}
+                {% set content_tmp = content_tmp + '
+如果你决定返回函数调用，请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>，不得包含其他文本。
+否则，请参考开头说的三种情况，以助手：开头进行回复。
+
+当前时间：' + datetime_str %}
+                {% set content_tmp = '<|startoftext|>' + content_tmp + '<|extra_4|>'%}
+                {% set content = content_tmp + '用户：' + content + '<|extra_0|>' %}
+            {% endif %}
+        {% else %}
+            {% if message['role'] == 'user' %}
+                {% set content = '用户：' + content + '<|extra_0|>' %}
+            {% elif message['role'] == 'assistant' %}
+                {% if 'tool_calls' in message %}
+                    {% set tool_calls = message['tool_calls'] %}
+                    {% set ns = namespace(tool_calls="[") %}
+                    {% for tool_call in tool_calls %}
+                        {% set function = tool_call['function'] %}
+                        {% set name = function['name'] %}
+                        {% set ns.tool_calls = ns.tool_calls + '{"name": "' + name + '", '%}
+                        {% set arguments = function['arguments'] %}
+                        {% if arguments is not string %}
+                            {% set arguments = arguments | tojson %}
+                        {% endif %}
+                        {% set ns.tool_calls = ns.tool_calls + '"arguments": ' + arguments + '}' %}
+                        {% if not loop.last %}
+                            {% set ns.tool_calls = ns.tool_calls + ', '%}
+                        {% endif %}
+                    {% endfor %}
+                    {% set ns.tool_calls = ns.tool_calls + ']' %}
+                    {% set content = content + '<tool_calls>' + ns.tool_calls + '</tool_calls>' %}
+                {% else %}
+                    {% set content = '助手：' + content %}
+                {% endif %}
+                {% set content = content + '<|eos|>' %}
+            {% elif message['role'] == 'tool' %}
+                {% if content is not string %}
+                    {set content = content | tojson }
+                {% endif %}
+                {% set content = '<tool_response>' + content + '</tool_response>' %}
+                {% set content = content + '<|extra_0|>' %}
+            {% endif %}
+        {% endif %}
+    {{- content -}}
+    {% endfor %}
+{% else %}
+    {% set context = {'has_head': true} %}
+    {% for message in loop_messages %}
+        {% if 'content' in message %}
+            {% set content = message['content'] %}
+        {% else %}
+            {% set content = '' %}
+        {% endif %}
+        {% if loop.index0 == 0 %}
+            {% if content == '' %}
+                {% set _ = context.update({'has_head': false}) %}
+            {% elif message['role'] == 'system' %}
+                {% set content = '<|startoftext|>' + content + '<|extra_4|>' %}
+            {% endif %}
+        {% endif %}
+        {% if message['role'] == 'user' %}
+            {% if loop.index0 == 1 and not context.has_head %}
+                {% set content = '<|startoftext|>' + content %}
+            {% endif %}
+            {% if loop.index0 == 1 and context.has_head %}
+                {% set content = content + '<|extra_0|>' %}
+            {% else %}
+                {% set content = '<|startoftext|>' + content + '<|extra_0|>' %}
+            {% endif %}
+        {% elif message['role'] == 'assistant' %}
+            {% set content = content + '<|eos|>' %}
+        {% elif message['role'] == 'tool' %}
+            {% set content = content + '<|extra_0|>' %}
+        {% endif %}
+        {{- content -}}
+    {% endfor %}
+{% endif %}
+{%- if enable_thinking is defined and enable_thinking is false %}
+    {{- '<think>\n\n</think>\n' }}
+{%- endif %}
+
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -3,6 +3,7 @@ site_url: https://docs.vllm.ai
 repo_url: https://github.com/vllm-project/vllm
 edit_uri: edit/main/docs/
 exclude_docs: |
+  argparse
  *.inc.md
  *.template.md
 theme:
@@ -47,6 +48,7 @@ theme:
 hooks:
  - docs/mkdocs/hooks/remove_announcement.py
  - docs/mkdocs/hooks/generate_examples.py
+  - docs/mkdocs/hooks/generate_argparse.py
  - docs/mkdocs/hooks/url_schemes.py

 # Required to stop api-autonav from raising an error
@@ -59,6 +61,7 @@ plugins:
  - search
  - autorefs
  - awesome-nav
+  - glightbox
  # For API reference generation
  - api-autonav:
      modules: ["vllm"]

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
    "packaging>=24.2",
    "setuptools>=77.0.3,<80.0.0",
    "setuptools-scm>=8.0",
-    "torch == 2.4.1",
+    "torch == 2.5.1",
    "wheel",
    "jinja2",
 ]
@@ -72,8 +72,6 @@ line-length = 80
 "vllm/core/**/*.py" = ["UP006", "UP035"]
 "vllm/engine/**/*.py" = ["UP006", "UP035"]
 "vllm/executor/**/*.py" = ["UP006", "UP035"]
-"vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"]
-"vllm/spec_decode/**/*.py" = ["UP006", "UP035"]
 "vllm/worker/**/*.py" = ["UP006", "UP035"]
 # Python 3.8 typing - skip utils for ROCm
 "vllm/utils/__init__.py" = ["UP006", "UP035"]
@@ -174,3 +172,186 @@ respect-ignore-files = true

 [tool.ty.environment]
 python = "./.venv"
+
+[tool.typos.files]
+# these files may be written in non english words
+extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
+    "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
+    "vllm/third_party/*"]
+ignore-hidden = true
+ignore-files = true
+ignore-dot = true
+ignore-vcs = true
+ignore-global = true
+ignore-parent = true
+
+[tool.typos.default]
+binary = false
+check-filename = false
+check-file = true
+unicode = true
+ignore-hex = true
+identifier-leading-digits = false
+locale = "en"
+extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
+    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*",
+     ".*[Tt]h[rR].*"]
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.default.extend-identifiers]
+bbc5b7ede = "bbc5b7ede"
+womens_doubles = "womens_doubles"
+v_2nd = "v_2nd"
+# splitted_input = "splitted_input"
+NOOPs = "NOOPs"
+typ = "typ"
+nin_shortcut = "nin_shortcut"
+UperNetDecoder = "UperNetDecoder"
+subtile = "subtile"
+cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
+SFOuput = "SFOuput"
+# huggingface transformers repo uses these words
+depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
+DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
+depthwise_seperable_CNN = "depthwise_seperable_CNN"
+
+[tool.typos.default.extend-words]
+iy = "iy"
+tendencias = "tendencias"
+# intel cpu features
+tme = "tme"
+dout = "dout"
+Pn = "Pn"
+arange = "arange"
+
+[tool.typos.type.py]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.py.extend-identifiers]
+arange = "arange"
+NDArray = "NDArray"
+EOFError = "EOFError"
+fo = "fo"
+ba = "ba"
+
+[tool.typos.type.py.extend-words]
+
+[tool.typos.type.cpp]
+extend-glob = ["*.cu"]
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.cpp.extend-identifiers]
+countr_one = "countr_one"
+k_ot = "k_ot"
+ot = "ot"
+
+[tool.typos.type.cpp.extend-words]
+
+[tool.typos.type.rust]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.rust.extend-identifiers]
+flate2 = "flate2"
+
+[tool.typos.type.rust.extend-words]
+ser = "ser"
+
+[tool.typos.type.lock]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.lock.extend-identifiers]
+
+[tool.typos.type.lock.extend-words]
+
+[tool.typos.type.jl]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.jl.extend-identifiers]
+
+[tool.typos.type.jl.extend-words]
+modul = "modul"
+egals = "egals"
+usig = "usig"
+egal = "egal"
+
+[tool.typos.type.go]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.go.extend-identifiers]
+flate = "flate"
+
+[tool.typos.type.go.extend-words]
+
+[tool.typos.type.css]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.css.extend-identifiers]
+nd = "nd"
+
+[tool.typos.type.css.extend-words]
+
+[tool.typos.type.man]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.man.extend-identifiers]
+Nd = "Nd"
+
+[tool.typos.type.man.extend-words]
+
+[tool.typos.type.cert]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.cert.extend-identifiers]
+
+[tool.typos.type.cert.extend-words]
+
+[tool.typos.type.sh]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.sh.extend-identifiers]
+ot = "ot"
+
+[tool.typos.type.sh.extend-words]
+
+[tool.typos.type.vimscript]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[tool.typos.type.vimscript.extend-identifiers]
+windo = "windo"
+
+[tool.typos.type.vimscript.extend-words]
--- a/requirements/build.txt
+++ b/requirements/build.txt
@@ -4,7 +4,7 @@ ninja
 packaging>=24.2
 setuptools>=77.0.3,<80.0.0
 setuptools-scm>=8
-torch==2.4.1
+torch==2.5.1
 wheel
 jinja2>=3.1.6
 regex
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -7,13 +7,13 @@ requests >= 2.26.0
 tqdm
 blake3
 py-cpuinfo
-transformers >= 4.51.1
+transformers >= 4.53.2
 huggingface-hub[hf_xet] >= 0.33.0  # Required for Xet downloads.
 tokenizers >= 0.21.1  # Required for fast incremental detokenization.
 protobuf # Required by LlamaTokenizer.
 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
 aiohttp
-openai >= 1.52.0, <= 1.90.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support)
+openai >= 1.87.0, <= 1.90.0 # Ensure modern openai package (ensure ResponsePrompt exists in type.responses and max_completion_tokens field support)
 pydantic >= 2.10
 prometheus_client >= 0.18.0
 pillow  # Required for image processing
@@ -21,9 +21,11 @@ prometheus-fastapi-instrumentator >= 7.0.0
 tiktoken >= 0.6.0  # Required for DBRX tokenizer
 lm-format-enforcer >= 0.10.11, < 0.11
 llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64"
-outlines == 0.1.11
+outlines_core == 0.2.10
+# required for outlines backend disk cache
+diskcache == 5.6.3
 lark == 1.2.2
-xgrammar == 0.1.19; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
+xgrammar == 0.1.21; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
 typing_extensions >= 4.10
 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
 partial-json-parser # used for parsing partial JSON outputs
@@ -31,17 +33,18 @@ pyzmq >= 25.0.0
 msgspec
 gguf >= 0.13.0
 importlib_metadata; python_version < '3.10'
-mistral_common[opencv] >= 1.5.4 # requires numpy>=1.25 #1.6.2
+mistral_common[image,audio] >= 1.5.4 # requires numpy>=1.25 #1.8.2
 opencv-python-headless >= 4.11.0    # required for video IO
 pyyaml
 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
 setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
 einops # Required for Qwen2-VL.
 compressed-tensors == 0.10.2 # required for compressed-tensors
-depyf==0.18.0 # required for profiling and debugging with compilation config
+depyf==0.19.0 # required for profiling and debugging with compilation config
 cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
 watchfiles # required for http server to monitor the updates of TLS files
 python-json-logger # Used by logging as per examples/others/logging_configuration.md
 scipy # Required for phi-4-multimodal-instruct
 ninja # Required for xgrammar, rocm, tpu, xpu
 pybase64 # fast base64 implementation
+cbor2 # Required for cross-language serialization of hashable objects
--- a/requirements/cpu.txt
+++ b/requirements/cpu.txt
@@ -24,6 +24,4 @@ datasets # for benchmark scripts
 # Intel Extension for PyTorch, only for x86_64 CPUs
 intel-openmp==2024.2.1; platform_machine == "x86_64"
 intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
-py-libnuma; platform_system != "Darwin"
-psutil; platform_system != "Darwin"
 triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile.
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -6,9 +6,9 @@ numba == 0.61.2; python_version > '3.9'

 # Dependencies for NVIDIA GPUs
 ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
-torch==2.7.0
-torchaudio==2.7.0
+torch==2.7.1
+torchaudio==2.7.1
 # These must be updated alongside torch
-torchvision==0.22.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
-# https://github.com/facebookresearch/xformers/releases/tag/v0.0.30
-xformers==0.0.30; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
+torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
+xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -4,6 +4,24 @@ mkdocs-material
 mkdocstrings-python
 mkdocs-gen-files
 mkdocs-awesome-nav
+mkdocs-glightbox
 python-markdown-math
 regex
 ruff
+
+# Required for argparse hook only
+-f https://download.pytorch.org/whl/cpu
+cachetools
+cbor2
+cloudpickle
+fastapi
+msgspec
+openai
+partial-json-parser
+pillow
+psutil
+pybase64
+pydantic
+torch
+transformers
+zmq
--- a/requirements/hpu.txt
+++ b/requirements/hpu.txt
-# Common dependencies
-r common.txt
-
-# Dependencies for HPU code
-ray
-triton==3.1.0
-pandas
-numpy==1.26.4
-tabulate
-setuptools>=77.0.3,<80.0.0
-setuptools-scm>=8
-vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@f1f6624
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
 # testing
 pytest
-tensorizer>=2.9.0
+tensorizer==2.10.1
 pytest-forked
 pytest-asyncio
 pytest-rerunfailures
@@ -23,7 +23,7 @@ jiwer # required for audio tests
 timm # required for internvl test
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
-mistral_common[opencv] >= 1.6.2 # required for pixtral test
+mistral_common[image,audio] >= 1.8.2 # required for voxtral test
 num2words # required for smolvlm test
 opencv-python-headless >= 4.11.0 # required for video test
 datamodel_code_generator # required for minicpm3 test

--- a/requirements/rocm.txt
+++ b/requirements/rocm.txt
@@ -11,12 +11,17 @@ datasets
 ray>=2.10.0,<2.45.0
 peft
 pytest-asyncio
-tensorizer>=2.9.0
+tensorizer==2.10.1
 packaging>=24.2
 setuptools>=77.0.3,<80.0.0
 setuptools-scm>=8
 runai-model-streamer==0.11.0
 runai-model-streamer-s3==0.11.0
+conch-triton-kernels==1.2.1
+
+numa
+python-multipart
+pytrie
 setuptools_scm>=8
 cmake==3.29

@@ -25,7 +30,4 @@ triton == 3.0.0
 flash_attn == 2.6.1
 flash_mla == 1.0.0
 lmslim == 0.3.0
-numa
-python-multipart
-pytrie

--- a/requirements/test.in
+++ b/requirements/test.in
 # testing
 pytest
-tensorizer>=2.9.0
+tensorizer==2.10.1
 pytest-forked
 pytest-asyncio
 pytest-rerunfailures
@@ -22,19 +22,20 @@ sentence-transformers # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
 timm # required for internvl test
-torch==2.7.0
-torchaudio==2.7.0
-torchvision==0.22.0
+torch==2.7.1
+torchaudio==2.7.1
+torchvision==0.22.1
 transformers_stream_generator # required for qwen-vl test
 mamba_ssm # required for plamo2 test
 matplotlib # required for qwen-vl test
-mistral_common[opencv] >= 1.6.2 # required for pixtral test
+mistral_common[image,audio] >= 1.8.2 # required for voxtral test
 num2words # required for smolvlm test
+open_clip_torch==2.32.0 # Required for nemotron_vl test
 opencv-python-headless >= 4.11.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]==0.4.8 # required for model evaluation test
 mteb[bm25s]>=1.38.11, <2 # required for mteb test
-transformers==4.52.4
+transformers==4.53.2
 tokenizers==0.21.1
 huggingface-hub[hf_xet]>=0.33.0  # Required for Xet downloads.
 schemathesis>=3.39.15 # Required for openai schema test.
@@ -53,3 +54,4 @@ runai-model-streamer==0.11.0
 runai-model-streamer-s3==0.11.0
 fastsafetensors>=0.1.10
 pydantic>=2.10 # 2.9 leads to error on python 3.10
+terratorch==1.1rc2 # required for PrithviMAE test
\ No newline at end of file
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,6 +6,10 @@ accelerate==1.0.1
    # via
    #   lm-eval
    #   peft
+aenum==3.1.16
+    # via lightly
+affine==2.4.0
+    # via rasterio
 aiohappyeyeballs==2.4.3
    # via aiohttp
 aiohttp==3.10.11
@@ -21,8 +25,18 @@ aiosignal==1.3.1
    # via
    #   aiohttp
    #   ray
+albucore==0.0.16
+    # via terratorch
+albumentations==1.4.6
+    # via terratorch
+alembic==1.16.4
+    # via mlflow
 annotated-types==0.7.0
    # via pydantic
+antlr4-python3-runtime==4.9.3
+    # via
+    #   hydra-core
+    #   omegaconf
 anyio==4.6.2.post1
    # via
    #   httpx
@@ -34,10 +48,12 @@ arrow==1.3.0
 attrs==24.2.0
    # via
    #   aiohttp
+    #   fiona
    #   hypothesis
    #   jsonlines
    #   jsonschema
    #   pytest-subtests
+    #   rasterio
    #   referencing
 audioread==3.0.1
    # via librosa
@@ -46,9 +62,13 @@ backoff==2.2.1
    #   -r requirements/test.in
    #   schemathesis
 bitsandbytes==0.46.1
-    # via -r requirements/test.in
+    # via
+    #   -r requirements/test.in
+    #   lightning
 black==24.10.0
    # via datamodel-code-generator
+blinker==1.9.0
+    # via flask
 blobfile==3.0.0
    # via -r requirements/test.in
 bm25s==0.2.13
@@ -64,11 +84,18 @@ bounded-pool-executor==0.0.3
 buildkite-test-collector==0.1.9
    # via -r requirements/test.in
 cachetools==5.5.2
-    # via google-auth
+    # via
+    #   google-auth
+    #   mlflow-skinny
 certifi==2024.8.30
    # via
+    #   fiona
    #   httpcore
    #   httpx
+    #   lightly
+    #   pyogrio
+    #   pyproj
+    #   rasterio
    #   requests
 cffi==1.17.1
    # via soundfile
@@ -79,11 +106,28 @@ charset-normalizer==3.4.0
 click==8.1.7
    # via
    #   black
+    #   click-plugins
+    #   cligj
+    #   fiona
+    #   flask
    #   jiwer
+    #   mlflow-skinny
    #   nltk
+    #   rasterio
    #   ray
    #   schemathesis
    #   typer
+    #   uvicorn
+click-plugins==1.1.1.2
+    # via
+    #   fiona
+    #   rasterio
+cligj==0.7.2
+    # via
+    #   fiona
+    #   rasterio
+cloudpickle==3.1.1
+    # via mlflow-skinny
 colorama==0.4.6
    # via
    #   sacrebleu
@@ -99,6 +143,8 @@ cupy-cuda12x==13.3.0
    # via ray
 cycler==0.12.1
    # via matplotlib
+databricks-sdk==0.59.0
+    # via mlflow-skinny
 datamodel-code-generator==0.26.3
    # via -r requirements/test.in
 dataproperty==1.0.1
@@ -122,13 +168,21 @@ distlib==0.3.9
    # via virtualenv
 dnspython==2.7.0
    # via email-validator
+docker==7.1.0
+    # via mlflow
 docopt==0.6.2
    # via num2words
-einops==0.8.0
+docstring-parser==0.17.0
+    # via jsonargparse
+efficientnet-pytorch==0.7.1
+    # via segmentation-models-pytorch
+einops==0.8.1
    # via
    #   -r requirements/test.in
    #   encodec
    #   mamba-ssm
+    #   terratorch
+    #   torchgeo
    #   vector-quantize-pytorch
    #   vocos
 einx==0.3.0
@@ -141,6 +195,8 @@ eval-type-backport==0.2.2
    # via mteb
 evaluate==0.4.3
    # via lm-eval
+fastapi==0.116.1
+    # via mlflow-skinny
 fastparquet==2024.11.0
    # via genai-perf
 fastrlock==0.8.2
@@ -156,6 +212,10 @@ filelock==3.16.1
    #   torch
    #   transformers
    #   virtualenv
+fiona==1.10.1
+    # via torchgeo
+flask==3.1.1
+    # via mlflow
 fonttools==4.54.1
    # via matplotlib
 fqdn==1.5.1
@@ -173,23 +233,50 @@ fsspec==2024.9.0
    #   evaluate
    #   fastparquet
    #   huggingface-hub
+    #   lightning
+    #   pytorch-lightning
    #   torch
+ftfy==6.3.1
+    # via open-clip-torch
 genai-perf==0.0.8
    # via -r requirements/test.in
 genson==1.3.0
    # via datamodel-code-generator
+geopandas==1.0.1
+    # via terratorch
+gitdb==4.0.12
+    # via gitpython
+gitpython==3.1.44
+    # via mlflow-skinny
 google-api-core==2.24.2
    # via opencensus
 google-auth==2.40.2
-    # via google-api-core
+    # via
+    #   databricks-sdk
+    #   google-api-core
 googleapis-common-protos==1.70.0
    # via google-api-core
+graphene==3.4.3
+    # via mlflow
 graphql-core==3.2.6
-    # via hypothesis-graphql
+    # via
+    #   graphene
+    #   graphql-relay
+    #   hypothesis-graphql
+graphql-relay==3.2.0
+    # via graphene
+greenlet==3.2.3
+    # via sqlalchemy
 grpcio==1.71.0
    # via ray
+gunicorn==23.0.0
+    # via mlflow
 h11==0.14.0
-    # via httpcore
+    # via
+    #   httpcore
+    #   uvicorn
+h5py==3.13.0
+    # via terratorch
 harfile==0.3.0
    # via schemathesis
 hf-xet==1.1.3
@@ -202,20 +289,27 @@ httpx==0.27.2
    # via
    #   -r requirements/test.in
    #   schemathesis
-huggingface-hub==0.33.0
+huggingface-hub==0.33.1
    # via
    #   -r requirements/test.in
    #   accelerate
    #   datasets
    #   evaluate
+    #   open-clip-torch
    #   peft
+    #   segmentation-models-pytorch
    #   sentence-transformers
+    #   terratorch
    #   timm
    #   tokenizers
    #   transformers
    #   vocos
 humanize==4.11.0
    # via runai-model-streamer
+hydra-core==1.3.2
+    # via
+    #   lightly
+    #   lightning
 hypothesis==6.131.0
    # via
    #   hypothesis-graphql
@@ -233,6 +327,14 @@ idna==3.10
    #   jsonschema
    #   requests
    #   yarl
+imageio==2.37.0
+    # via scikit-image
+importlib-metadata==8.7.0
+    # via
+    #   mlflow-skinny
+    #   opentelemetry-api
+importlib-resources==6.5.2
+    # via typeshed-client
 inflect==5.6.2
    # via datamodel-code-generator
 iniconfig==2.0.0
@@ -241,9 +343,13 @@ isoduration==20.11.0
    # via jsonschema
 isort==5.13.2
    # via datamodel-code-generator
+itsdangerous==2.2.0
+    # via flask
 jinja2==3.1.6
    # via
    #   datamodel-code-generator
+    #   flask
+    #   mlflow
    #   torch
 jiwer==3.0.5
    # via -r requirements/test.in
@@ -256,6 +362,10 @@ joblib==1.4.2
    #   librosa
    #   nltk
    #   scikit-learn
+jsonargparse==4.35.0
+    # via
+    #   lightning
+    #   terratorch
 jsonlines==4.0.0
    # via lm-eval
 jsonpointer==3.0.0
@@ -274,12 +384,33 @@ kaleido==0.2.1
    # via genai-perf
 kiwisolver==1.4.7
    # via matplotlib
+kornia==0.8.1
+    # via torchgeo
+kornia-rs==0.1.9
+    # via kornia
 lazy-loader==0.4
-    # via librosa
+    # via
+    #   librosa
+    #   scikit-image
 libnacl==2.1.0
    # via tensorizer
 librosa==0.10.2.post1
    # via -r requirements/test.in
+lightly==1.5.20
+    # via
+    #   terratorch
+    #   torchgeo
+lightly-utils==0.0.2
+    # via lightly
+lightning==2.5.1.post0
+    # via
+    #   terratorch
+    #   torchgeo
+lightning-utilities==0.14.3
+    # via
+    #   lightning
+    #   pytorch-lightning
+    #   torchmetrics
 llvmlite==0.44.0
    # via numba
 lm-eval==0.4.8
@@ -288,16 +419,27 @@ lxml==5.3.0
    # via
    #   blobfile
    #   sacrebleu
+mako==1.3.10
+    # via alembic
 mamba-ssm==2.2.4
    # via -r requirements/test.in
+markdown==3.8.2
+    # via mlflow
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.1
    # via
+    #   flask
    #   jinja2
+    #   mako
    #   werkzeug
 matplotlib==3.9.2
-    # via -r requirements/test.in
+    # via
+    #   -r requirements/test.in
+    #   lightning
+    #   mlflow
+    #   pycocotools
+    #   torchgeo
 mbstrdecoder==1.1.3
    # via
    #   dataproperty
@@ -305,8 +447,12 @@ mbstrdecoder==1.1.3
    #   typepy
 mdurl==0.1.2
    # via markdown-it-py
-mistral-common==1.6.2
+mistral-common==1.8.2
    # via -r requirements/test.in
+mlflow==2.22.0
+    # via terratorch
+mlflow-skinny==2.22.0
+    # via mlflow
 more-itertools==10.5.0
    # via lm-eval
 mpmath==1.3.0
@@ -325,10 +471,14 @@ multiprocess==0.70.16
    # via
    #   datasets
    #   evaluate
+munch==4.0.0
+    # via pretrainedmodels
 mypy-extensions==1.0.0
    # via black
 networkx==3.2.1
-    # via torch
+    # via
+    #   scikit-image
+    #   torch
 ninja==1.11.1.3
    # via mamba-ssm
 nltk==3.9.1
@@ -345,6 +495,8 @@ numpy==1.26.4
    # via
    #   -r requirements/test.in
    #   accelerate
+    #   albucore
+    #   albumentations
    #   bitsandbytes
    #   bm25s
    #   contourpy
@@ -355,9 +507,15 @@ numpy==1.26.4
    #   evaluate
    #   fastparquet
    #   genai-perf
+    #   geopandas
+    #   h5py
+    #   imageio
    #   librosa
+    #   lightly
+    #   lightly-utils
    #   matplotlib
    #   mistral-common
+    #   mlflow
    #   mteb
    #   numba
    #   numexpr
@@ -365,18 +523,30 @@ numpy==1.26.4
    #   pandas
    #   patsy
    #   peft
+    #   pycocotools
+    #   pyogrio
+    #   rasterio
+    #   rioxarray
    #   rouge-score
    #   runai-model-streamer
    #   sacrebleu
+    #   scikit-image
    #   scikit-learn
    #   scipy
+    #   segmentation-models-pytorch
+    #   shapely
    #   soxr
    #   statsmodels
+    #   tensorboardx
    #   tensorizer
+    #   tifffile
+    #   torchgeo
+    #   torchmetrics
    #   torchvision
    #   transformers
    #   tritonclient
    #   vocos
+    #   xarray
 nvidia-cublas-cu12==12.8.3.14
    # via
    #   nvidia-cudnn-cu12
@@ -414,6 +584,12 @@ nvidia-nvjitlink-cu12==12.8.61
    #   torch
 nvidia-nvtx-cu12==12.8.55
    # via torch
+omegaconf==2.3.0
+    # via
+    #   hydra-core
+    #   lightning
+open-clip-torch==2.32.0
+    # via -r requirements/test.in
 opencensus==0.11.4
    # via ray
 opencensus-context==0.1.3
@@ -421,7 +597,18 @@ opencensus-context==0.1.3
 opencv-python-headless==4.11.0.86
    # via
    #   -r requirements/test.in
+    #   albucore
+    #   albumentations
    #   mistral-common
+opentelemetry-api==1.35.0
+    # via
+    #   mlflow-skinny
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-sdk==1.35.0
+    # via mlflow-skinny
+opentelemetry-semantic-conventions==0.56b0
+    # via opentelemetry-sdk
 packaging==24.2
    # via
    #   accelerate
@@ -430,26 +617,44 @@ packaging==24.2
    #   datasets
    #   evaluate
    #   fastparquet
+    #   geopandas
+    #   gunicorn
    #   huggingface-hub
+    #   hydra-core
+    #   kornia
    #   lazy-loader
+    #   lightning
+    #   lightning-utilities
    #   mamba-ssm
    #   matplotlib
+    #   mlflow-skinny
    #   peft
    #   plotly
    #   pooch
+    #   pyogrio
    #   pytest
    #   pytest-rerunfailures
+    #   pytorch-lightning
    #   ray
+    #   rioxarray
+    #   scikit-image
    #   statsmodels
+    #   tensorboardx
+    #   torchmetrics
    #   transformers
    #   typepy
+    #   xarray
 pandas==2.2.3
    # via
    #   datasets
    #   evaluate
    #   fastparquet
    #   genai-perf
+    #   geopandas
+    #   mlflow
    #   statsmodels
+    #   torchgeo
+    #   xarray
 pathspec==0.12.1
    # via black
 pathvalidate==3.2.1
@@ -463,9 +668,14 @@ peft==0.13.2
 pillow==10.4.0
    # via
    #   genai-perf
+    #   imageio
+    #   lightly-utils
    #   matplotlib
    #   mistral-common
+    #   scikit-image
+    #   segmentation-models-pytorch
    #   sentence-transformers
+    #   torchgeo
    #   torchvision
 platformdirs==4.3.6
    # via
@@ -484,6 +694,8 @@ portalocker==2.10.1
    # via sacrebleu
 pqdm==0.2.0
    # via -r requirements/test.in
+pretrainedmodels==0.7.4
+    # via segmentation-models-pytorch
 prometheus-client==0.22.0
    # via ray
 propcache==0.2.0
@@ -494,8 +706,10 @@ protobuf==5.28.3
    # via
    #   google-api-core
    #   googleapis-common-protos
+    #   mlflow-skinny
    #   proto-plus
    #   ray
+    #   tensorboardx
    #   tensorizer
 psutil==6.1.0
    # via
@@ -510,6 +724,7 @@ pyarrow==18.0.0
    # via
    #   datasets
    #   genai-perf
+    #   mlflow
 pyasn1==0.6.1
    # via
    #   pyasn1-modules
@@ -518,6 +733,10 @@ pyasn1-modules==0.4.2
    # via google-auth
 pybind11==2.13.6
    # via lm-eval
+pycocotools==2.0.8
+    # via terratorch
+pycountry==24.6.1
+    # via pydantic-extra-types
 pycparser==2.22
    # via cffi
 pycryptodomex==3.22.0
@@ -525,23 +744,39 @@ pycryptodomex==3.22.0
 pydantic==2.11.5
    # via
    #   -r requirements/test.in
+    #   albumentations
    #   datamodel-code-generator
+    #   fastapi
+    #   lightly
    #   mistral-common
+    #   mlflow-skinny
    #   mteb
+    #   pydantic-extra-types
    #   ray
 pydantic-core==2.33.2
    # via pydantic
+pydantic-extra-types==2.10.5
+    # via mistral-common
 pygments==2.18.0
    # via rich
+pyogrio==0.11.0
+    # via geopandas
 pyparsing==3.2.0
-    # via matplotlib
+    # via
+    #   matplotlib
+    #   rasterio
+pyproj==3.7.1
+    # via
+    #   geopandas
+    #   rioxarray
+    #   torchgeo
 pyrate-limiter==3.7.0
    # via schemathesis
 pystemmer==3.0.0
    # via mteb
 pytablewriter==1.2.0
    # via lm-eval
-pytest==8.3.3
+pytest==8.3.5
    # via
    #   -r requirements/test.in
    #   buildkite-test-collector
@@ -554,6 +789,7 @@ pytest==8.3.3
    #   pytest-subtests
    #   pytest-timeout
    #   schemathesis
+    #   terratorch
 pytest-asyncio==0.24.0
    # via -r requirements/test.in
 pytest-forked==1.6.0
@@ -568,15 +804,23 @@ pytest-subtests==0.14.1
    # via schemathesis
 pytest-timeout==2.3.1
    # via -r requirements/test.in
+python-box==7.3.2
+    # via terratorch
 python-dateutil==2.9.0.post0
    # via
    #   arrow
    #   botocore
+    #   graphene
+    #   lightly
    #   matplotlib
    #   pandas
    #   typepy
 python-rapidjson==1.20
    # via tritonclient
+pytorch-lightning==2.5.2
+    # via
+    #   lightly
+    #   lightning
 pytrec-eval-terrier==0.5.7
    # via mteb
 pytz==2024.2
@@ -586,11 +830,17 @@ pytz==2024.2
 pyyaml==6.0.2
    # via
    #   accelerate
+    #   albumentations
    #   datamodel-code-generator
    #   datasets
    #   genai-perf
    #   huggingface-hub
+    #   jsonargparse
+    #   lightning
+    #   mlflow-skinny
+    #   omegaconf
    #   peft
+    #   pytorch-lightning
    #   ray
    #   responses
    #   schemathesis
@@ -599,6 +849,11 @@ pyyaml==6.0.2
    #   vocos
 rapidfuzz==3.12.1
    # via jiwer
+rasterio==1.4.3
+    # via
+    #   rioxarray
+    #   terratorch
+    #   torchgeo
 ray==2.43.0
    # via -r requirements/test.in
 redis==5.2.0
@@ -610,18 +865,23 @@ referencing==0.35.1
 regex==2024.9.11
    # via
    #   nltk
+    #   open-clip-torch
    #   sacrebleu
    #   tiktoken
    #   transformers
 requests==2.32.3
    # via
    #   buildkite-test-collector
+    #   databricks-sdk
    #   datasets
+    #   docker
    #   evaluate
    #   google-api-core
    #   huggingface-hub
+    #   lightly
    #   lm-eval
    #   mistral-common
+    #   mlflow-skinny
    #   mteb
    #   pooch
    #   ray
@@ -639,8 +899,11 @@ rfc3987==1.3.8
 rich==13.9.4
    # via
    #   genai-perf
+    #   lightning
    #   mteb
    #   typer
+rioxarray==0.19.0
+    # via terratorch
 rouge-score==0.1.2
    # via lm-eval
 rpds-py==0.20.1
@@ -649,6 +912,8 @@ rpds-py==0.20.1
    #   referencing
 rsa==4.9.1
    # via google-auth
+rtree==1.4.0
+    # via torchgeo
 runai-model-streamer==0.11.0
    # via -r requirements/test.in
 runai-model-streamer-s3==0.11.0
@@ -660,26 +925,38 @@ sacrebleu==2.4.3
 safetensors==0.4.5
    # via
    #   accelerate
+    #   open-clip-torch
    #   peft
    #   timm
    #   transformers
 schemathesis==3.39.15
    # via -r requirements/test.in
+scikit-image==0.25.2
+    # via albumentations
 scikit-learn==1.5.2
    # via
+    #   albumentations
    #   librosa
    #   lm-eval
+    #   mlflow
    #   mteb
    #   sentence-transformers
 scipy==1.13.1
    # via
+    #   albumentations
    #   bm25s
    #   librosa
+    #   mlflow
    #   mteb
+    #   scikit-image
    #   scikit-learn
    #   sentence-transformers
    #   statsmodels
    #   vocos
+segmentation-models-pytorch==0.4.0
+    # via
+    #   terratorch
+    #   torchgeo
 sentence-transformers==3.2.1
    # via
    #   -r requirements/test.in
@@ -688,21 +965,30 @@ sentencepiece==0.2.0
    # via mistral-common
 setuptools==77.0.3
    # via
+    #   lightning-utilities
    #   mamba-ssm
    #   pytablewriter
    #   torch
    #   triton
+shapely==2.1.1
+    # via
+    #   geopandas
+    #   torchgeo
 shellingham==1.5.4
    # via typer
 six==1.16.0
    # via
    #   junit-xml
+    #   lightly
    #   opencensus
    #   python-dateutil
    #   rfc3339-validator
    #   rouge-score
+    #   segmentation-models-pytorch
 smart-open==7.1.0
    # via ray
+smmap==5.0.2
+    # via gitdb
 sniffio==1.3.1
    # via
    #   anyio
@@ -713,12 +999,22 @@ soundfile==0.12.1
    # via
    #   -r requirements/test.in
    #   librosa
+    #   mistral-common
 soxr==0.5.0.post1
-    # via librosa
+    # via
+    #   librosa
+    #   mistral-common
+sqlalchemy==2.0.41
+    # via
+    #   alembic
+    #   mlflow
 sqlitedict==2.1.0
    # via lm-eval
+sqlparse==0.5.3
+    # via mlflow-skinny
 starlette==0.46.2
    # via
+    #   fastapi
    #   schemathesis
    #   starlette-testclient
 starlette-testclient==0.4.1
@@ -739,16 +1035,29 @@ tenacity==9.0.0
    # via
    #   lm-eval
    #   plotly
-tensorizer==2.9.0
+tensorboardx==2.6.4
+    # via lightning
+tensorizer==2.10.1
+    # via -r requirements/test.in
+terratorch==1.1rc2
    # via -r requirements/test.in
 threadpoolctl==3.5.0
    # via scikit-learn
+tifffile==2025.3.30
+    # via
+    #   scikit-image
+    #   terratorch
 tiktoken==0.7.0
    # via
    #   lm-eval
    #   mistral-common
-timm==1.0.11
-    # via -r requirements/test.in
+timm==1.0.15
+    # via
+    #   -r requirements/test.in
+    #   open-clip-torch
+    #   segmentation-models-pytorch
+    #   terratorch
+    #   torchgeo
 tokenizers==0.21.1
    # via
    #   -r requirements/test.in
@@ -757,50 +1066,81 @@ tomli==2.2.1
    # via schemathesis
 tomli-w==1.2.0
    # via schemathesis
-torch==2.7.0+cu128
+torch==2.7.1+cu128
    # via
    #   -r requirements/test.in
    #   accelerate
    #   bitsandbytes
+    #   efficientnet-pytorch
    #   encodec
    #   fastsafetensors
+    #   kornia
+    #   lightly
+    #   lightning
    #   lm-eval
    #   mamba-ssm
    #   mteb
+    #   open-clip-torch
    #   peft
+    #   pretrainedmodels
+    #   pytorch-lightning
    #   runai-model-streamer
+    #   segmentation-models-pytorch
    #   sentence-transformers
    #   tensorizer
+    #   terratorch
    #   timm
    #   torchaudio
+    #   torchgeo
+    #   torchmetrics
    #   torchvision
    #   vector-quantize-pytorch
    #   vocos
-torchaudio==2.7.0+cu128
+torchaudio==2.7.1+cu128
    # via
    #   -r requirements/test.in
    #   encodec
    #   vocos
-torchvision==0.22.0+cu128
+torchgeo==0.7.0
+    # via terratorch
+torchmetrics==1.7.4
+    # via
+    #   lightning
+    #   pytorch-lightning
+    #   terratorch
+    #   torchgeo
+torchvision==0.22.1+cu128
    # via
    #   -r requirements/test.in
+    #   lightly
+    #   open-clip-torch
+    #   pretrainedmodels
+    #   segmentation-models-pytorch
+    #   terratorch
    #   timm
+    #   torchgeo
 tqdm==4.66.6
    # via
    #   datasets
    #   evaluate
    #   huggingface-hub
+    #   lightly
+    #   lightning
    #   lm-eval
    #   mteb
    #   nltk
+    #   open-clip-torch
    #   peft
    #   pqdm
+    #   pretrainedmodels
+    #   pytorch-lightning
+    #   segmentation-models-pytorch
    #   sentence-transformers
    #   tqdm-multiprocess
    #   transformers
 tqdm-multiprocess==0.0.11
    # via lm-eval
-transformers==4.52.4
+transformers==4.53.2
    # via
    #   -r requirements/test.in
    #   genai-perf
@@ -811,7 +1151,7 @@ transformers==4.52.4
    #   transformers-stream-generator
 transformers-stream-generator==0.0.5
    # via -r requirements/test.in
-triton==3.3.0
+triton==3.3.1
    # via torch
 tritonclient==2.51.0
    # via
@@ -826,17 +1166,34 @@ typer==0.15.2
    # via fastsafetensors
 types-python-dateutil==2.9.0.20241206
    # via arrow
+typeshed-client==2.8.2
+    # via jsonargparse
 typing-extensions==4.12.2
    # via
+    #   albumentations
+    #   alembic
+    #   fastapi
+    #   graphene
    #   huggingface-hub
    #   librosa
+    #   lightning
+    #   lightning-utilities
    #   mistral-common
+    #   mlflow-skinny
    #   mteb
+    #   opentelemetry-api
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
    #   pqdm
    #   pydantic
    #   pydantic-core
+    #   pydantic-extra-types
+    #   pytorch-lightning
+    #   sqlalchemy
    #   torch
+    #   torchgeo
    #   typer
+    #   typeshed-client
    #   typing-inspection
 typing-inspection==0.4.1
    # via pydantic
@@ -848,23 +1205,33 @@ urllib3==2.2.3
    # via
    #   blobfile
    #   botocore
+    #   docker
+    #   lightly
    #   requests
    #   responses
    #   tritonclient
+uvicorn==0.35.0
+    # via mlflow-skinny
 vector-quantize-pytorch==1.21.2
    # via -r requirements/test.in
 virtualenv==20.31.2
    # via ray
 vocos==0.1.0
    # via -r requirements/test.in
+wcwidth==0.2.13
+    # via ftfy
 webcolors==24.11.1
    # via jsonschema
 werkzeug==3.1.3
-    # via schemathesis
+    # via
+    #   flask
+    #   schemathesis
 word2number==1.1
    # via lm-eval
 wrapt==1.17.2
    # via smart-open
+xarray==2025.7.1
+    # via rioxarray
 xxhash==3.5.0
    # via
    #   datasets
@@ -873,5 +1240,7 @@ yarl==1.17.1
    # via
    #   aiohttp
    #   schemathesis
+zipp==3.23.0
+    # via importlib-metadata
 zstandard==0.23.0
    # via lm-eval