Commit 711aa9d5 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.10.0' into v0.10.0-dev

parents 751c492c 6d8d0a24
#!/bin/bash
HOST="0.0.0.0"
PORT=8006
DATA_PARALLEL_SIZE=4
REDUNDANT_EXPERTS=0
LOCAL_MODEL_PATH="/models/models--deepseek-ai--DeepSeek-V2-Lite/snapshots/604d5664dddd88a0433dbae533b7fe9472482de0"
MODEL_NAME="deepseek-ai/DeepSeek-V2-Lite"
while [[ $# -gt 0 ]]; do
case $1 in
--dp)
DATA_PARALLEL_SIZE="$2"
shift 2
;;
--re)
REDUNDANT_EXPERTS="$2"
shift 2
;;
--host)
HOST="$2"
shift 2
;;
--port)
PORT="$2"
shift 2
;;
--model)
MODEL_NAME="$2"
shift 2
;;
--local-model)
MODEL_NAME=$LOCAL_MODEL_PATH
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --dp SIZE Set data parallel size (default: 4)"
echo " --re SIZE Set redundant experts (default: 0)"
echo " --host HOST Set host address (default: 0.0.0.0)"
echo " --port PORT Set port number (default: 8006)"
echo " --model MODEL_NAME Set model name or path"
echo " -h, --help Show this help message"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use -h or --help for usage information"
exit 1
;;
esac
done
echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALLEL_SIZE and redundant experts: $REDUNDANT_EXPERTS"
export RAY_DEDUP_LOGS=0
export VLLM_USE_V1=1
export VLLM_ALL2ALL_BACKEND="pplx"
export VLLM_USE_DEEP_GEMM=1
vllm serve $MODEL_NAME \
--data-parallel-size $DATA_PARALLEL_SIZE \
--data-parallel-size-local $DATA_PARALLEL_SIZE \
--data-parallel-backend ray \
--enforce-eager \
--enable-expert-parallel \
--enable-eplb \
--num-redundant-experts $REDUNDANT_EXPERTS \
--trust-remote-code \
--host $HOST \
--port $PORT
#!/bin/bash #!/bin/bash
#
# Helper script to manually start or join a Ray cluster for online serving of vLLM models.
# This script is first executed on the head node, and then on each worker node with the IP address
# of the head node.
#
# Subcommands:
# leader: Launches a Ray head node and blocks until the cluster reaches the expected size (head + workers).
# worker: Starts a worker node that connects to an existing Ray head node.
#
# Example usage:
# On the head node machine, start the Ray head node process and run a vLLM server.
# ./multi-node-serving.sh leader --ray_port=6379 --ray_cluster_size=<SIZE> [<extra ray args>] && \
# python3 -m vllm.entrypoints.openai.api_server --port 8080 --model meta-llama/Meta-Llama-3.1-405B-Instruct --tensor-parallel-size 8 --pipeline_parallel_size 2
#
# On each worker node, start the Ray worker node process.
# ./multi-node-serving.sh worker --ray_address=<HEAD_NODE_IP> --ray_port=6379 [<extra ray args>]
#
# About Ray:
# Ray is an open-source distributed execution framework that simplifies
# distributed computing. Learn more:
# https://ray.io/
subcommand=$1
shift
ray_port=6379 subcommand=$1 # Either "leader" or "worker".
ray_init_timeout=300 shift # Remove the subcommand from the argument list.
declare -a start_params
ray_port=6379 # Port used by the Ray head node.
ray_init_timeout=300 # Seconds to wait before timing out.
declare -a start_params # Parameters forwarded to the underlying 'ray start' command.
# Handle the worker subcommand.
case "$subcommand" in case "$subcommand" in
worker) worker)
ray_address="" ray_address=""
...@@ -32,6 +55,7 @@ case "$subcommand" in ...@@ -32,6 +55,7 @@ case "$subcommand" in
exit 1 exit 1
fi fi
# Retry until the worker node connects to the head node or the timeout expires.
for (( i=0; i < $ray_init_timeout; i+=5 )); do for (( i=0; i < $ray_init_timeout; i+=5 )); do
ray start --address=$ray_address:$ray_port --block "${start_params[@]}" ray start --address=$ray_address:$ray_port --block "${start_params[@]}"
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
...@@ -45,6 +69,7 @@ case "$subcommand" in ...@@ -45,6 +69,7 @@ case "$subcommand" in
exit 1 exit 1
;; ;;
# Handle the leader subcommand.
leader) leader)
ray_cluster_size="" ray_cluster_size=""
while [ $# -gt 0 ]; do while [ $# -gt 0 ]; do
...@@ -69,10 +94,10 @@ case "$subcommand" in ...@@ -69,10 +94,10 @@ case "$subcommand" in
exit 1 exit 1
fi fi
# start the ray daemon # Start the Ray head node.
ray start --head --port=$ray_port "${start_params[@]}" ray start --head --port=$ray_port "${start_params[@]}"
# wait until all workers are active # Poll Ray until every worker node is active.
for (( i=0; i < $ray_init_timeout; i+=5 )); do for (( i=0; i < $ray_init_timeout; i+=5 )); do
active_nodes=`python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))'` active_nodes=`python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))'`
if [ $active_nodes -eq $ray_cluster_size ]; then if [ $active_nodes -eq $ray_cluster_size ]; then
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Example online usage of Score API.
Run `vllm serve <model> --task score` to start up the server in vLLM.
"""
import argparse
import pprint
import requests
def post_http_request(prompt: dict, api_url: str) -> requests.Response:
headers = {"User-Agent": "Test Client"}
response = requests.post(api_url, headers=headers, json=prompt)
return response
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument("--model", type=str, default="jinaai/jina-reranker-m0")
return parser.parse_args()
def main(args):
api_url = f"http://{args.host}:{args.port}/score"
model_name = args.model
text_1 = "slm markdown"
text_2 = {
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png"
},
},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
},
},
]
}
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
score_response = post_http_request(prompt=prompt, api_url=api_url)
print("\nPrompt when text_1 is string and text_2 is a image list:")
pprint.pprint(prompt)
print("\nScore Response:")
pprint.pprint(score_response.json())
if __name__ == "__main__":
args = parse_args()
main(args)
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
""" """
Example to deploy DeepSeek R1 or V3 with Ray Serve LLM. Deploy DeepSeek R1 or V3 with Ray Serve LLM.
See more details at:
https://docs.ray.io/en/latest/serve/tutorials/serve-deepseek.html Ray Serve LLM is a scalable and production-grade model serving library built
And see Ray Serve LLM documentation at: on the Ray distributed computing framework and first-class support for the vLLM engine.
https://docs.ray.io/en/latest/serve/llm/serving-llms.html
Key features:
- Automatic scaling, back-pressure, and load balancing across a Ray cluster.
- Unified multi-node multi-model deployment.
- Exposes an OpenAI-compatible HTTP API.
- Multi-LoRA support with shared base models.
Run `python3 ray_serve_deepseek.py` to deploy the model. Run `python3 ray_serve_deepseek.py` to launch an endpoint.
Learn more in the official Ray Serve LLM documentation:
https://docs.ray.io/en/latest/serve/llm/serving-llms.html
""" """
from ray import serve from ray import serve
...@@ -16,9 +24,8 @@ from ray.serve.llm import LLMConfig, build_openai_app ...@@ -16,9 +24,8 @@ from ray.serve.llm import LLMConfig, build_openai_app
llm_config = LLMConfig( llm_config = LLMConfig(
model_loading_config={ model_loading_config={
"model_id": "deepseek", "model_id": "deepseek",
# Since DeepSeek model is huge, it is recommended to pre-download # Pre-downloading the model to local storage is recommended since
# the model to local disk, say /path/to/the/model and specify: # the model is large. Set model_source="/path/to/the/model".
# model_source="/path/to/the/model"
"model_source": "deepseek-ai/DeepSeek-R1", "model_source": "deepseek-ai/DeepSeek-R1",
}, },
deployment_config={ deployment_config={
...@@ -27,10 +34,10 @@ llm_config = LLMConfig( ...@@ -27,10 +34,10 @@ llm_config = LLMConfig(
"max_replicas": 1, "max_replicas": 1,
} }
}, },
# Change to the accelerator type of the node # Set to the node's accelerator type.
accelerator_type="H100", accelerator_type="H100",
runtime_env={"env_vars": {"VLLM_USE_V1": "1"}}, runtime_env={"env_vars": {"VLLM_USE_V1": "1"}},
# Customize engine arguments as needed (e.g. vLLM engine kwargs) # Customize engine arguments as required (for example, vLLM engine kwargs).
engine_kwargs={ engine_kwargs={
"tensor_parallel_size": 8, "tensor_parallel_size": 8,
"pipeline_parallel_size": 2, "pipeline_parallel_size": 2,
...@@ -44,6 +51,6 @@ llm_config = LLMConfig( ...@@ -44,6 +51,6 @@ llm_config = LLMConfig(
}, },
) )
# Deploy the application # Deploy the application.
llm_app = build_openai_app({"llm_configs": [llm_config]}) llm_app = build_openai_app({"llm_configs": [llm_config]})
serve.run(llm_app) serve.run(llm_app)
#!/bin/bash #!/bin/bash
#
# Launch a Ray cluster inside Docker for vLLM inference.
#
# This script can start either a head node or a worker node, depending on the
# --head or --worker flag provided as the third positional argument.
#
# Usage:
# 1. Designate one machine as the head node and execute:
# bash run_cluster.sh \
# vllm/vllm-openai \
# <head_node_ip> \
# --head \
# /abs/path/to/huggingface/cache \
# -e VLLM_HOST_IP=<head_node_ip>
#
# 2. On every worker machine, execute:
# bash run_cluster.sh \
# vllm/vllm-openai \
# <head_node_ip> \
# --worker \
# /abs/path/to/huggingface/cache \
# -e VLLM_HOST_IP=<worker_node_ip>
#
# Each worker requires a unique VLLM_HOST_IP value.
# Keep each terminal session open. Closing a session stops the associated Ray
# node and thereby shuts down the entire cluster.
# Every machine must be reachable at the supplied IP address.
#
# The container is named "node-<random_suffix>". To open a shell inside
# a container after launch, use:
# docker exec -it node-<random_suffix> /bin/bash
#
# Then, you can execute vLLM commands on the Ray cluster as if it were a
# single machine, e.g. vllm serve ...
#
# To stop the container, use:
# docker stop node-<random_suffix>
# Check for minimum number of required arguments # Check for minimum number of required arguments.
if [ $# -lt 4 ]; then if [ $# -lt 4 ]; then
echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]" echo "Usage: $0 docker_image head_node_ip --head|--worker path_to_hf_home [additional_args...]"
exit 1 exit 1
fi fi
# Assign the first three arguments and shift them away # Extract the mandatory positional arguments and remove them from $@.
DOCKER_IMAGE="$1" DOCKER_IMAGE="$1"
HEAD_NODE_ADDRESS="$2" HEAD_NODE_ADDRESS="$2"
NODE_TYPE="$3" # Should be --head or --worker NODE_TYPE="$3" # Should be --head or --worker.
PATH_TO_HF_HOME="$4" PATH_TO_HF_HOME="$4"
shift 4 shift 4
# Additional arguments are passed directly to the Docker command # Preserve any extra arguments so they can be forwarded to Docker.
ADDITIONAL_ARGS=("$@") ADDITIONAL_ARGS=("$@")
# Validate node type # Validate the NODE_TYPE argument.
if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
echo "Error: Node type must be --head or --worker" echo "Error: Node type must be --head or --worker"
exit 1 exit 1
fi fi
# Define a function to cleanup on EXIT signal # Generate a unique container name with random suffix.
# Docker container names must be unique on each host.
# The random suffix allows multiple Ray containers to run simultaneously on the same machine,
# for example, on a multi-GPU machine.
CONTAINER_NAME="node-${RANDOM}"
# Define a cleanup routine that removes the container when the script exits.
# This prevents orphaned containers from accumulating if the script is interrupted.
cleanup() { cleanup() {
docker stop node docker stop "${CONTAINER_NAME}"
docker rm node docker rm "${CONTAINER_NAME}"
} }
trap cleanup EXIT trap cleanup EXIT
# Command setup for head or worker node # Build the Ray start command based on the node role.
# The head node manages the cluster and accepts connections on port 6379,
# while workers connect to the head's address.
RAY_START_CMD="ray start --block" RAY_START_CMD="ray start --block"
if [ "${NODE_TYPE}" == "--head" ]; then if [ "${NODE_TYPE}" == "--head" ]; then
RAY_START_CMD+=" --head --port=6379" RAY_START_CMD+=" --head --port=6379"
...@@ -37,11 +83,15 @@ else ...@@ -37,11 +83,15 @@ else
RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379" RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379"
fi fi
# Run the docker command with the user specified parameters and additional arguments # Launch the container with the assembled parameters.
# --network host: Allows Ray nodes to communicate directly via host networking
# --shm-size 10.24g: Increases shared memory
# --gpus all: Gives container access to all GPUs on the host
# -v HF_HOME: Mounts HuggingFace cache to avoid re-downloading models
docker run \ docker run \
--entrypoint /bin/bash \ --entrypoint /bin/bash \
--network host \ --network host \
--name node \ --name "${CONTAINER_NAME}" \
--shm-size 10.24g \ --shm-size 10.24g \
--gpus all \ --gpus all \
-v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \ -v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import argparse import argparse
import dataclasses import dataclasses
import json import json
import logging
import os import os
import uuid import uuid
...@@ -15,9 +16,13 @@ from vllm.model_executor.model_loader.tensorizer import ( ...@@ -15,9 +16,13 @@ from vllm.model_executor.model_loader.tensorizer import (
TensorizerConfig, TensorizerConfig,
tensorize_lora_adapter, tensorize_lora_adapter,
tensorize_vllm_model, tensorize_vllm_model,
tensorizer_kwargs_arg,
) )
from vllm.utils import FlexibleArgumentParser from vllm.utils import FlexibleArgumentParser
logger = logging.getLogger()
# yapf conflicts with isort for this docstring # yapf conflicts with isort for this docstring
# yapf: disable # yapf: disable
""" """
...@@ -119,7 +124,7 @@ vllm serve <model_path> \ ...@@ -119,7 +124,7 @@ vllm serve <model_path> \
""" """
def parse_args(): def get_parser():
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="An example script that can be used to serialize and " description="An example script that can be used to serialize and "
"deserialize vLLM models. These models " "deserialize vLLM models. These models "
...@@ -135,13 +140,13 @@ def parse_args(): ...@@ -135,13 +140,13 @@ def parse_args():
required=False, required=False,
help="Path to a LoRA adapter to " help="Path to a LoRA adapter to "
"serialize along with model tensors. This can then be deserialized " "serialize along with model tensors. This can then be deserialized "
"along with the model by passing a tensorizer_config kwarg to " "along with the model by instantiating a TensorizerConfig object, "
"LoRARequest with type TensorizerConfig. See the docstring for this " "creating a dict from it with TensorizerConfig.to_serializable(), "
"for a usage example." "and passing it to LoRARequest's initializer with the kwarg "
"tensorizer_config_dict."
) )
subparsers = parser.add_subparsers(dest='command') subparsers = parser.add_subparsers(dest='command', required=True)
serialize_parser = subparsers.add_parser( serialize_parser = subparsers.add_parser(
'serialize', help="Serialize a model to `--serialized-directory`") 'serialize', help="Serialize a model to `--serialized-directory`")
...@@ -171,6 +176,14 @@ def parse_args(): ...@@ -171,6 +176,14 @@ def parse_args():
"where `suffix` is given by `--suffix` or a random UUID if not " "where `suffix` is given by `--suffix` or a random UUID if not "
"provided.") "provided.")
serialize_parser.add_argument(
"--serialization-kwargs",
type=tensorizer_kwargs_arg,
required=False,
help=("A JSON string containing additional keyword arguments to "
"pass to Tensorizer's TensorSerializer during "
"serialization."))
serialize_parser.add_argument( serialize_parser.add_argument(
"--keyfile", "--keyfile",
type=str, type=str,
...@@ -186,9 +199,17 @@ def parse_args(): ...@@ -186,9 +199,17 @@ def parse_args():
deserialize_parser.add_argument( deserialize_parser.add_argument(
"--path-to-tensors", "--path-to-tensors",
type=str, type=str,
required=True, required=False,
help="The local path or S3 URI to the model tensors to deserialize. ") help="The local path or S3 URI to the model tensors to deserialize. ")
deserialize_parser.add_argument(
"--serialized-directory",
type=str,
required=False,
help="Directory with model artifacts for loading. Assumes a "
"model.tensors file exists therein. Can supersede "
"--path-to-tensors.")
deserialize_parser.add_argument( deserialize_parser.add_argument(
"--keyfile", "--keyfile",
type=str, type=str,
...@@ -196,11 +217,27 @@ def parse_args(): ...@@ -196,11 +217,27 @@ def parse_args():
help=("Path to a binary key to use to decrypt the model weights," help=("Path to a binary key to use to decrypt the model weights,"
" if the model was serialized with encryption")) " if the model was serialized with encryption"))
TensorizerArgs.add_cli_args(deserialize_parser) deserialize_parser.add_argument(
"--deserialization-kwargs",
type=tensorizer_kwargs_arg,
required=False,
help=("A JSON string containing additional keyword arguments to "
"pass to Tensorizer's `TensorDeserializer` during "
"deserialization."))
return parser.parse_args() TensorizerArgs.add_cli_args(deserialize_parser)
return parser
def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
cfg: TensorizerConfig):
for k, v in extra_cfg.items():
if hasattr(cfg, k):
setattr(cfg, k, v)
logger.info(
"Updating TensorizerConfig with %s from "
"--model-loader-extra-config provided", k
)
def deserialize(args, tensorizer_config): def deserialize(args, tensorizer_config):
if args.lora_path: if args.lora_path:
...@@ -230,7 +267,8 @@ def deserialize(args, tensorizer_config): ...@@ -230,7 +267,8 @@ def deserialize(args, tensorizer_config):
lora_request=LoRARequest("sql-lora", lora_request=LoRARequest("sql-lora",
1, 1,
args.lora_path, args.lora_path,
tensorizer_config = tensorizer_config) tensorizer_config_dict = tensorizer_config
.to_serializable())
) )
) )
else: else:
...@@ -243,7 +281,8 @@ def deserialize(args, tensorizer_config): ...@@ -243,7 +281,8 @@ def deserialize(args, tensorizer_config):
def main(): def main():
args = parse_args() parser = get_parser()
args = parser.parse_args()
s3_access_key_id = (getattr(args, 's3_access_key_id', None) s3_access_key_id = (getattr(args, 's3_access_key_id', None)
or os.environ.get("S3_ACCESS_KEY_ID", None)) or os.environ.get("S3_ACCESS_KEY_ID", None))
...@@ -265,13 +304,24 @@ def main(): ...@@ -265,13 +304,24 @@ def main():
else: else:
keyfile = None keyfile = None
extra_config = {}
if args.model_loader_extra_config: if args.model_loader_extra_config:
config = json.loads(args.model_loader_extra_config) extra_config = json.loads(args.model_loader_extra_config)
tensorizer_args = \
TensorizerConfig(**config)._construct_tensorizer_args()
tensorizer_args.tensorizer_uri = args.path_to_tensors tensorizer_dir = (args.serialized_directory or
else: extra_config.get("tensorizer_dir"))
tensorizer_args = None tensorizer_uri = (getattr(args, "path_to_tensors", None)
or extra_config.get("tensorizer_uri"))
if tensorizer_dir and tensorizer_uri:
parser.error("--serialized-directory and --path-to-tensors "
"cannot both be provided")
if not tensorizer_dir and not tensorizer_uri:
parser.error("Either --serialized-directory or --path-to-tensors "
"must be provided")
if args.command == "serialize": if args.command == "serialize":
eng_args_dict = {f.name: getattr(args, f.name) for f in eng_args_dict = {f.name: getattr(args, f.name) for f in
...@@ -281,7 +331,7 @@ def main(): ...@@ -281,7 +331,7 @@ def main():
argparse.Namespace(**eng_args_dict) argparse.Namespace(**eng_args_dict)
) )
input_dir = args.serialized_directory.rstrip('/') input_dir = tensorizer_dir.rstrip('/')
suffix = args.suffix if args.suffix else uuid.uuid4().hex suffix = args.suffix if args.suffix else uuid.uuid4().hex
base_path = f"{input_dir}/vllm/{model_ref}/{suffix}" base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
if engine_args.tensor_parallel_size > 1: if engine_args.tensor_parallel_size > 1:
...@@ -292,21 +342,29 @@ def main(): ...@@ -292,21 +342,29 @@ def main():
tensorizer_config = TensorizerConfig( tensorizer_config = TensorizerConfig(
tensorizer_uri=model_path, tensorizer_uri=model_path,
encryption_keyfile=keyfile, encryption_keyfile=keyfile,
**credentials) serialization_kwargs=args.serialization_kwargs or {},
**credentials
)
if args.lora_path: if args.lora_path:
tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
tensorize_lora_adapter(args.lora_path, tensorizer_config) tensorize_lora_adapter(args.lora_path, tensorizer_config)
merge_extra_config_with_tensorizer_config(extra_config,
tensorizer_config)
tensorize_vllm_model(engine_args, tensorizer_config) tensorize_vllm_model(engine_args, tensorizer_config)
elif args.command == "deserialize": elif args.command == "deserialize":
if not tensorizer_args: tensorizer_config = TensorizerConfig(
tensorizer_config = TensorizerConfig( tensorizer_uri=args.path_to_tensors,
tensorizer_uri=args.path_to_tensors, tensorizer_dir=args.serialized_directory,
encryption_keyfile = keyfile, encryption_keyfile=keyfile,
**credentials deserialization_kwargs=args.deserialization_kwargs or {},
) **credentials
)
merge_extra_config_with_tensorizer_config(extra_config,
tensorizer_config)
deserialize(args, tensorizer_config) deserialize(args, tensorizer_config)
else: else:
raise ValueError("Either serialize or deserialize must be specified.") raise ValueError("Either serialize or deserialize must be specified.")
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %} {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %} {%- endif %}
{%- endif %} {%- endif %}
{%- endfor %} {%- endfor -%}
{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #} {#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
{% if tools is defined and tools is not none %} {% if tools is defined and tools is not none %}
...@@ -27,8 +27,8 @@ ...@@ -27,8 +27,8 @@
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %} {% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
{% endif %} {% endif %}
{{ bos_token }} {{- bos_token }}
{{ ns.system_prompt }} {{- ns.system_prompt }}
{%- for message in messages %} {%- for message in messages %}
{% set content = message['content'] %} {% set content = message['content'] %}
{%- if message['role'] == 'user' %} {%- if message['role'] == 'user' %}
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %} {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- set ns.is_last_user = false -%} {%- set ns.is_last_user = false -%}
{%- if ns.is_tool %} {%- if ns.is_tool %}
{{'<|tool▁outputs▁end|>'}} {{- '<|tool▁outputs▁end|>'}}
{%- endif %} {%- endif %}
{%- set ns.is_first = false %} {%- set ns.is_first = false %}
{%- set ns.is_tool = false -%} {%- set ns.is_tool = false -%}
...@@ -53,40 +53,40 @@ ...@@ -53,40 +53,40 @@
{%- for tool in message['tool_calls'] %} {%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %} {%- if not ns.is_first %}
{%- if content is none %} {%- if content is none %}
{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}} {{- '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- else %} {%- else %}
{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}} {{- content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %} {%- endif %}
{%- set ns.is_first = true -%} {%- set ns.is_first = true -%}
{%- else %} {%- else %}
{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}} {{- '\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %} {%- endif %}
{%- endfor %} {%- endfor %}
{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}}
{%- endif %} {%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%} {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
{%- set ns.is_last_user = false -%} {%- set ns.is_last_user = false -%}
{%- if ns.is_tool %} {%- if ns.is_tool %}
{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}} {{- '<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}
{%- set ns.is_tool = false -%} {%- set ns.is_tool = false -%}
{%- else %} {%- else %}
{{content + '<|end▁of▁sentence|>'}} {{- content + '<|end▁of▁sentence|>'}}
{%- endif %} {%- endif %}
{%- endif %} {%- endif %}
{%- if message['role'] == 'tool' %} {%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%} {%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%} {%- set ns.is_tool = true -%}
{%- if ns.is_output_first %} {%- if ns.is_output_first %}
{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}} {{- '<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{%- set ns.is_output_first = false %} {%- set ns.is_output_first = false %}
{%- else %} {%- else %}
{{'\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}} {{- '\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{%- endif %} {%- endif %}
{%- endif %} {%- endif %}
{%- endfor -%} {%- endfor -%}
{% if ns.is_tool %} {% if ns.is_tool %}
{{'<|tool▁outputs▁end|>'}} {{- '<|tool▁outputs▁end|>'}}
{% endif %} {%- endif %}
{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %} {% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
{{'<|Assistant|>'}} {{- '<|Assistant|>'}}
{% endif %} {%- endif %}
\ No newline at end of file
{% set loop_messages = messages %}
{% if tools %}
{% set weekday_map = {'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日'} %}
{% set weekday_cn = weekday_map[strftime_now('%A')] %}
{% set datetime_str = strftime_now('%Y-%m-%d %H:%M:%S') %}
{% set datetime_str = datetime_str + ' ' + weekday_cn %}
{% for message in loop_messages %}
{% if 'content' in message %}
{% set content = message['content'] %}
{% else %}
{% set content = '' %}
{% endif %}
{% if loop.index0 == 0 %}
{% set content_tmp = '你是一位函数组合专家。你会得到一个问题和一组可能的函数。根据问题,你需要进行一个或多个函数/工具调用以实现目的。
如果没有一个函数可以使用,请直接使用自然语言回复用户,以助手:开头。
如果给定的问题缺少函数所需的参数,请使用自然语言进行提问,向用户询问必要信息,以助手:开头。
如果调用结果已经足够回答用户问题,请对历史结果进行总结,使用自然语言回复用户,以助手:开头。
你应该只在工具调用部分返回函数调用。如果你决定调用任何函数,你必须将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>。你不应该在回复中包含任何其他文本。以下是你可以调用的函数列表,格式为JSON。
' %}
{% set content_tmp = content_tmp + '
' + tools | tojson + '
' %}
{% if message['role'] == 'system' %}
{% set content_tmp = content_tmp + '
额外要求:
' + content + '
如果你决定返回函数调用,请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>,不得包含其他文本。如果额外要求里有格式要求,请忽略,以此处为准。
否则,请参考开头说的三种情况,以助手:开头进行回复。
如果额外要求里有时间信息,就以额外要求里的时间为准,否则,参考当前时间:' + datetime_str %}
{% set content = '<|startoftext|>' + content_tmp + '<|extra_4|>' %}
{% elif message['role'] == 'user' %}
{% set content_tmp = content_tmp + '
如果你决定返回函数调用,请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>,不得包含其他文本。
否则,请参考开头说的三种情况,以助手:开头进行回复。
当前时间:' + datetime_str %}
{% set content_tmp = '<|startoftext|>' + content_tmp + '<|extra_4|>'%}
{% set content = content_tmp + '用户:' + content + '<|extra_0|>' %}
{% endif %}
{% else %}
{% if message['role'] == 'user' %}
{% set content = '用户:' + content + '<|extra_0|>' %}
{% elif message['role'] == 'assistant' %}
{% if 'tool_calls' in message %}
{% set tool_calls = message['tool_calls'] %}
{% set ns = namespace(tool_calls="[") %}
{% for tool_call in tool_calls %}
{% set function = tool_call['function'] %}
{% set name = function['name'] %}
{% set ns.tool_calls = ns.tool_calls + '{"name": "' + name + '", '%}
{% set arguments = function['arguments'] %}
{% if arguments is not string %}
{% set arguments = arguments | tojson %}
{% endif %}
{% set ns.tool_calls = ns.tool_calls + '"arguments": ' + arguments + '}' %}
{% if not loop.last %}
{% set ns.tool_calls = ns.tool_calls + ', '%}
{% endif %}
{% endfor %}
{% set ns.tool_calls = ns.tool_calls + ']' %}
{% set content = content + '<tool_calls>' + ns.tool_calls + '</tool_calls>' %}
{% else %}
{% set content = '助手:' + content %}
{% endif %}
{% set content = content + '<|eos|>' %}
{% elif message['role'] == 'tool' %}
{% if content is not string %}
{set content = content | tojson }
{% endif %}
{% set content = '<tool_response>' + content + '</tool_response>' %}
{% set content = content + '<|extra_0|>' %}
{% endif %}
{% endif %}
{{- content -}}
{% endfor %}
{% else %}
{% set context = {'has_head': true} %}
{% for message in loop_messages %}
{% if 'content' in message %}
{% set content = message['content'] %}
{% else %}
{% set content = '' %}
{% endif %}
{% if loop.index0 == 0 %}
{% if content == '' %}
{% set _ = context.update({'has_head': false}) %}
{% elif message['role'] == 'system' %}
{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}
{% endif %}
{% endif %}
{% if message['role'] == 'user' %}
{% if loop.index0 == 1 and not context.has_head %}
{% set content = '<|startoftext|>' + content %}
{% endif %}
{% if loop.index0 == 1 and context.has_head %}
{% set content = content + '<|extra_0|>' %}
{% else %}
{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}
{% endif %}
{% elif message['role'] == 'assistant' %}
{% set content = content + '<|eos|>' %}
{% elif message['role'] == 'tool' %}
{% set content = content + '<|extra_0|>' %}
{% endif %}
{{- content -}}
{% endfor %}
{% endif %}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n' }}
{%- endif %}
...@@ -3,6 +3,7 @@ site_url: https://docs.vllm.ai ...@@ -3,6 +3,7 @@ site_url: https://docs.vllm.ai
repo_url: https://github.com/vllm-project/vllm repo_url: https://github.com/vllm-project/vllm
edit_uri: edit/main/docs/ edit_uri: edit/main/docs/
exclude_docs: | exclude_docs: |
argparse
*.inc.md *.inc.md
*.template.md *.template.md
theme: theme:
...@@ -47,6 +48,7 @@ theme: ...@@ -47,6 +48,7 @@ theme:
hooks: hooks:
- docs/mkdocs/hooks/remove_announcement.py - docs/mkdocs/hooks/remove_announcement.py
- docs/mkdocs/hooks/generate_examples.py - docs/mkdocs/hooks/generate_examples.py
- docs/mkdocs/hooks/generate_argparse.py
- docs/mkdocs/hooks/url_schemes.py - docs/mkdocs/hooks/url_schemes.py
# Required to stop api-autonav from raising an error # Required to stop api-autonav from raising an error
...@@ -59,6 +61,7 @@ plugins: ...@@ -59,6 +61,7 @@ plugins:
- search - search
- autorefs - autorefs
- awesome-nav - awesome-nav
- glightbox
# For API reference generation # For API reference generation
- api-autonav: - api-autonav:
modules: ["vllm"] modules: ["vllm"]
......
...@@ -6,7 +6,7 @@ requires = [ ...@@ -6,7 +6,7 @@ requires = [
"packaging>=24.2", "packaging>=24.2",
"setuptools>=77.0.3,<80.0.0", "setuptools>=77.0.3,<80.0.0",
"setuptools-scm>=8.0", "setuptools-scm>=8.0",
"torch == 2.4.1", "torch == 2.5.1",
"wheel", "wheel",
"jinja2", "jinja2",
] ]
...@@ -72,8 +72,6 @@ line-length = 80 ...@@ -72,8 +72,6 @@ line-length = 80
"vllm/core/**/*.py" = ["UP006", "UP035"] "vllm/core/**/*.py" = ["UP006", "UP035"]
"vllm/engine/**/*.py" = ["UP006", "UP035"] "vllm/engine/**/*.py" = ["UP006", "UP035"]
"vllm/executor/**/*.py" = ["UP006", "UP035"] "vllm/executor/**/*.py" = ["UP006", "UP035"]
"vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"]
"vllm/spec_decode/**/*.py" = ["UP006", "UP035"]
"vllm/worker/**/*.py" = ["UP006", "UP035"] "vllm/worker/**/*.py" = ["UP006", "UP035"]
# Python 3.8 typing - skip utils for ROCm # Python 3.8 typing - skip utils for ROCm
"vllm/utils/__init__.py" = ["UP006", "UP035"] "vllm/utils/__init__.py" = ["UP006", "UP035"]
...@@ -174,3 +172,186 @@ respect-ignore-files = true ...@@ -174,3 +172,186 @@ respect-ignore-files = true
[tool.ty.environment] [tool.ty.environment]
python = "./.venv" python = "./.venv"
[tool.typos.files]
# these files may be written in non english words
extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
"benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
"vllm/third_party/*"]
ignore-hidden = true
ignore-files = true
ignore-dot = true
ignore-vcs = true
ignore-global = true
ignore-parent = true
[tool.typos.default]
binary = false
check-filename = false
check-file = true
unicode = true
ignore-hex = true
identifier-leading-digits = false
locale = "en"
extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*",
".*[Tt]h[rR].*"]
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.default.extend-identifiers]
bbc5b7ede = "bbc5b7ede"
womens_doubles = "womens_doubles"
v_2nd = "v_2nd"
# splitted_input = "splitted_input"
NOOPs = "NOOPs"
typ = "typ"
nin_shortcut = "nin_shortcut"
UperNetDecoder = "UperNetDecoder"
subtile = "subtile"
cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
SFOuput = "SFOuput"
# huggingface transformers repo uses these words
depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
depthwise_seperable_CNN = "depthwise_seperable_CNN"
[tool.typos.default.extend-words]
iy = "iy"
tendencias = "tendencias"
# intel cpu features
tme = "tme"
dout = "dout"
Pn = "Pn"
arange = "arange"
[tool.typos.type.py]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.py.extend-identifiers]
arange = "arange"
NDArray = "NDArray"
EOFError = "EOFError"
fo = "fo"
ba = "ba"
[tool.typos.type.py.extend-words]
[tool.typos.type.cpp]
extend-glob = ["*.cu"]
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cpp.extend-identifiers]
countr_one = "countr_one"
k_ot = "k_ot"
ot = "ot"
[tool.typos.type.cpp.extend-words]
[tool.typos.type.rust]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.rust.extend-identifiers]
flate2 = "flate2"
[tool.typos.type.rust.extend-words]
ser = "ser"
[tool.typos.type.lock]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.lock.extend-identifiers]
[tool.typos.type.lock.extend-words]
[tool.typos.type.jl]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.jl.extend-identifiers]
[tool.typos.type.jl.extend-words]
modul = "modul"
egals = "egals"
usig = "usig"
egal = "egal"
[tool.typos.type.go]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.go.extend-identifiers]
flate = "flate"
[tool.typos.type.go.extend-words]
[tool.typos.type.css]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.css.extend-identifiers]
nd = "nd"
[tool.typos.type.css.extend-words]
[tool.typos.type.man]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.man.extend-identifiers]
Nd = "Nd"
[tool.typos.type.man.extend-words]
[tool.typos.type.cert]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cert.extend-identifiers]
[tool.typos.type.cert.extend-words]
[tool.typos.type.sh]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.sh.extend-identifiers]
ot = "ot"
[tool.typos.type.sh.extend-words]
[tool.typos.type.vimscript]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.vimscript.extend-identifiers]
windo = "windo"
[tool.typos.type.vimscript.extend-words]
...@@ -4,7 +4,7 @@ ninja ...@@ -4,7 +4,7 @@ ninja
packaging>=24.2 packaging>=24.2
setuptools>=77.0.3,<80.0.0 setuptools>=77.0.3,<80.0.0
setuptools-scm>=8 setuptools-scm>=8
torch==2.4.1 torch==2.5.1
wheel wheel
jinja2>=3.1.6 jinja2>=3.1.6
regex regex
...@@ -7,13 +7,13 @@ requests >= 2.26.0 ...@@ -7,13 +7,13 @@ requests >= 2.26.0
tqdm tqdm
blake3 blake3
py-cpuinfo py-cpuinfo
transformers >= 4.51.1 transformers >= 4.53.2
huggingface-hub[hf_xet] >= 0.33.0 # Required for Xet downloads. huggingface-hub[hf_xet] >= 0.33.0 # Required for Xet downloads.
tokenizers >= 0.21.1 # Required for fast incremental detokenization. tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf # Required by LlamaTokenizer. protobuf # Required by LlamaTokenizer.
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
aiohttp aiohttp
openai >= 1.52.0, <= 1.90.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support) openai >= 1.87.0, <= 1.90.0 # Ensure modern openai package (ensure ResponsePrompt exists in type.responses and max_completion_tokens field support)
pydantic >= 2.10 pydantic >= 2.10
prometheus_client >= 0.18.0 prometheus_client >= 0.18.0
pillow # Required for image processing pillow # Required for image processing
...@@ -21,9 +21,11 @@ prometheus-fastapi-instrumentator >= 7.0.0 ...@@ -21,9 +21,11 @@ prometheus-fastapi-instrumentator >= 7.0.0
tiktoken >= 0.6.0 # Required for DBRX tokenizer tiktoken >= 0.6.0 # Required for DBRX tokenizer
lm-format-enforcer >= 0.10.11, < 0.11 lm-format-enforcer >= 0.10.11, < 0.11
llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64"
outlines == 0.1.11 outlines_core == 0.2.10
# required for outlines backend disk cache
diskcache == 5.6.3
lark == 1.2.2 lark == 1.2.2
xgrammar == 0.1.19; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" xgrammar == 0.1.21; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
typing_extensions >= 4.10 typing_extensions >= 4.10
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
partial-json-parser # used for parsing partial JSON outputs partial-json-parser # used for parsing partial JSON outputs
...@@ -31,17 +33,18 @@ pyzmq >= 25.0.0 ...@@ -31,17 +33,18 @@ pyzmq >= 25.0.0
msgspec msgspec
gguf >= 0.13.0 gguf >= 0.13.0
importlib_metadata; python_version < '3.10' importlib_metadata; python_version < '3.10'
mistral_common[opencv] >= 1.5.4 # requires numpy>=1.25 #1.6.2 mistral_common[image,audio] >= 1.5.4 # requires numpy>=1.25 #1.8.2
opencv-python-headless >= 4.11.0 # required for video IO opencv-python-headless >= 4.11.0 # required for video IO
pyyaml pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL. einops # Required for Qwen2-VL.
compressed-tensors == 0.10.2 # required for compressed-tensors compressed-tensors == 0.10.2 # required for compressed-tensors
depyf==0.18.0 # required for profiling and debugging with compilation config depyf==0.19.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files watchfiles # required for http server to monitor the updates of TLS files
python-json-logger # Used by logging as per examples/others/logging_configuration.md python-json-logger # Used by logging as per examples/others/logging_configuration.md
scipy # Required for phi-4-multimodal-instruct scipy # Required for phi-4-multimodal-instruct
ninja # Required for xgrammar, rocm, tpu, xpu ninja # Required for xgrammar, rocm, tpu, xpu
pybase64 # fast base64 implementation pybase64 # fast base64 implementation
cbor2 # Required for cross-language serialization of hashable objects
...@@ -24,6 +24,4 @@ datasets # for benchmark scripts ...@@ -24,6 +24,4 @@ datasets # for benchmark scripts
# Intel Extension for PyTorch, only for x86_64 CPUs # Intel Extension for PyTorch, only for x86_64 CPUs
intel-openmp==2024.2.1; platform_machine == "x86_64" intel-openmp==2024.2.1; platform_machine == "x86_64"
intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218 intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
py-libnuma; platform_system != "Darwin"
psutil; platform_system != "Darwin"
triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile. triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile.
...@@ -6,9 +6,9 @@ numba == 0.61.2; python_version > '3.9' ...@@ -6,9 +6,9 @@ numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs # Dependencies for NVIDIA GPUs
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1. ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.7.0 torch==2.7.1
torchaudio==2.7.0 torchaudio==2.7.1
# These must be updated alongside torch # These must be updated alongside torch
torchvision==0.22.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.30 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
xformers==0.0.30; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7 xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
...@@ -4,6 +4,24 @@ mkdocs-material ...@@ -4,6 +4,24 @@ mkdocs-material
mkdocstrings-python mkdocstrings-python
mkdocs-gen-files mkdocs-gen-files
mkdocs-awesome-nav mkdocs-awesome-nav
mkdocs-glightbox
python-markdown-math python-markdown-math
regex regex
ruff ruff
# Required for argparse hook only
-f https://download.pytorch.org/whl/cpu
cachetools
cbor2
cloudpickle
fastapi
msgspec
openai
partial-json-parser
pillow
psutil
pybase64
pydantic
torch
transformers
zmq
# Common dependencies
-r common.txt
# Dependencies for HPU code
ray
triton==3.1.0
pandas
numpy==1.26.4
tabulate
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@f1f6624
# testing # testing
pytest pytest
tensorizer>=2.9.0 tensorizer==2.10.1
pytest-forked pytest-forked
pytest-asyncio pytest-asyncio
pytest-rerunfailures pytest-rerunfailures
...@@ -23,7 +23,7 @@ jiwer # required for audio tests ...@@ -23,7 +23,7 @@ jiwer # required for audio tests
timm # required for internvl test timm # required for internvl test
transformers_stream_generator # required for qwen-vl test transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.6.2 # required for pixtral test mistral_common[image,audio] >= 1.8.2 # required for voxtral test
num2words # required for smolvlm test num2words # required for smolvlm test
opencv-python-headless >= 4.11.0 # required for video test opencv-python-headless >= 4.11.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
......
...@@ -11,12 +11,17 @@ datasets ...@@ -11,12 +11,17 @@ datasets
ray>=2.10.0,<2.45.0 ray>=2.10.0,<2.45.0
peft peft
pytest-asyncio pytest-asyncio
tensorizer>=2.9.0 tensorizer==2.10.1
packaging>=24.2 packaging>=24.2
setuptools>=77.0.3,<80.0.0 setuptools>=77.0.3,<80.0.0
setuptools-scm>=8 setuptools-scm>=8
runai-model-streamer==0.11.0 runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0 runai-model-streamer-s3==0.11.0
conch-triton-kernels==1.2.1
numa
python-multipart
pytrie
setuptools_scm>=8 setuptools_scm>=8
cmake==3.29 cmake==3.29
...@@ -25,7 +30,4 @@ triton == 3.0.0 ...@@ -25,7 +30,4 @@ triton == 3.0.0
flash_attn == 2.6.1 flash_attn == 2.6.1
flash_mla == 1.0.0 flash_mla == 1.0.0
lmslim == 0.3.0 lmslim == 0.3.0
numa
python-multipart
pytrie
# testing # testing
pytest pytest
tensorizer>=2.9.0 tensorizer==2.10.1
pytest-forked pytest-forked
pytest-asyncio pytest-asyncio
pytest-rerunfailures pytest-rerunfailures
...@@ -22,19 +22,20 @@ sentence-transformers # required for embedding tests ...@@ -22,19 +22,20 @@ sentence-transformers # required for embedding tests
soundfile # required for audio tests soundfile # required for audio tests
jiwer # required for audio tests jiwer # required for audio tests
timm # required for internvl test timm # required for internvl test
torch==2.7.0 torch==2.7.1
torchaudio==2.7.0 torchaudio==2.7.1
torchvision==0.22.0 torchvision==0.22.1
transformers_stream_generator # required for qwen-vl test transformers_stream_generator # required for qwen-vl test
mamba_ssm # required for plamo2 test mamba_ssm # required for plamo2 test
matplotlib # required for qwen-vl test matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.6.2 # required for pixtral test mistral_common[image,audio] >= 1.8.2 # required for voxtral test
num2words # required for smolvlm test num2words # required for smolvlm test
open_clip_torch==2.32.0 # Required for nemotron_vl test
opencv-python-headless >= 4.11.0 # required for video test opencv-python-headless >= 4.11.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.8 # required for model evaluation test lm-eval[api]==0.4.8 # required for model evaluation test
mteb[bm25s]>=1.38.11, <2 # required for mteb test mteb[bm25s]>=1.38.11, <2 # required for mteb test
transformers==4.52.4 transformers==4.53.2
tokenizers==0.21.1 tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads. huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads.
schemathesis>=3.39.15 # Required for openai schema test. schemathesis>=3.39.15 # Required for openai schema test.
...@@ -53,3 +54,4 @@ runai-model-streamer==0.11.0 ...@@ -53,3 +54,4 @@ runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0 runai-model-streamer-s3==0.11.0
fastsafetensors>=0.1.10 fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10 pydantic>=2.10 # 2.9 leads to error on python 3.10
terratorch==1.1rc2 # required for PrithviMAE test
\ No newline at end of file
...@@ -6,6 +6,10 @@ accelerate==1.0.1 ...@@ -6,6 +6,10 @@ accelerate==1.0.1
# via # via
# lm-eval # lm-eval
# peft # peft
aenum==3.1.16
# via lightly
affine==2.4.0
# via rasterio
aiohappyeyeballs==2.4.3 aiohappyeyeballs==2.4.3
# via aiohttp # via aiohttp
aiohttp==3.10.11 aiohttp==3.10.11
...@@ -21,8 +25,18 @@ aiosignal==1.3.1 ...@@ -21,8 +25,18 @@ aiosignal==1.3.1
# via # via
# aiohttp # aiohttp
# ray # ray
albucore==0.0.16
# via terratorch
albumentations==1.4.6
# via terratorch
alembic==1.16.4
# via mlflow
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
antlr4-python3-runtime==4.9.3
# via
# hydra-core
# omegaconf
anyio==4.6.2.post1 anyio==4.6.2.post1
# via # via
# httpx # httpx
...@@ -34,10 +48,12 @@ arrow==1.3.0 ...@@ -34,10 +48,12 @@ arrow==1.3.0
attrs==24.2.0 attrs==24.2.0
# via # via
# aiohttp # aiohttp
# fiona
# hypothesis # hypothesis
# jsonlines # jsonlines
# jsonschema # jsonschema
# pytest-subtests # pytest-subtests
# rasterio
# referencing # referencing
audioread==3.0.1 audioread==3.0.1
# via librosa # via librosa
...@@ -46,9 +62,13 @@ backoff==2.2.1 ...@@ -46,9 +62,13 @@ backoff==2.2.1
# -r requirements/test.in # -r requirements/test.in
# schemathesis # schemathesis
bitsandbytes==0.46.1 bitsandbytes==0.46.1
# via -r requirements/test.in # via
# -r requirements/test.in
# lightning
black==24.10.0 black==24.10.0
# via datamodel-code-generator # via datamodel-code-generator
blinker==1.9.0
# via flask
blobfile==3.0.0 blobfile==3.0.0
# via -r requirements/test.in # via -r requirements/test.in
bm25s==0.2.13 bm25s==0.2.13
...@@ -64,11 +84,18 @@ bounded-pool-executor==0.0.3 ...@@ -64,11 +84,18 @@ bounded-pool-executor==0.0.3
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
# via -r requirements/test.in # via -r requirements/test.in
cachetools==5.5.2 cachetools==5.5.2
# via google-auth # via
# google-auth
# mlflow-skinny
certifi==2024.8.30 certifi==2024.8.30
# via # via
# fiona
# httpcore # httpcore
# httpx # httpx
# lightly
# pyogrio
# pyproj
# rasterio
# requests # requests
cffi==1.17.1 cffi==1.17.1
# via soundfile # via soundfile
...@@ -79,11 +106,28 @@ charset-normalizer==3.4.0 ...@@ -79,11 +106,28 @@ charset-normalizer==3.4.0
click==8.1.7 click==8.1.7
# via # via
# black # black
# click-plugins
# cligj
# fiona
# flask
# jiwer # jiwer
# mlflow-skinny
# nltk # nltk
# rasterio
# ray # ray
# schemathesis # schemathesis
# typer # typer
# uvicorn
click-plugins==1.1.1.2
# via
# fiona
# rasterio
cligj==0.7.2
# via
# fiona
# rasterio
cloudpickle==3.1.1
# via mlflow-skinny
colorama==0.4.6 colorama==0.4.6
# via # via
# sacrebleu # sacrebleu
...@@ -99,6 +143,8 @@ cupy-cuda12x==13.3.0 ...@@ -99,6 +143,8 @@ cupy-cuda12x==13.3.0
# via ray # via ray
cycler==0.12.1 cycler==0.12.1
# via matplotlib # via matplotlib
databricks-sdk==0.59.0
# via mlflow-skinny
datamodel-code-generator==0.26.3 datamodel-code-generator==0.26.3
# via -r requirements/test.in # via -r requirements/test.in
dataproperty==1.0.1 dataproperty==1.0.1
...@@ -122,13 +168,21 @@ distlib==0.3.9 ...@@ -122,13 +168,21 @@ distlib==0.3.9
# via virtualenv # via virtualenv
dnspython==2.7.0 dnspython==2.7.0
# via email-validator # via email-validator
docker==7.1.0
# via mlflow
docopt==0.6.2 docopt==0.6.2
# via num2words # via num2words
einops==0.8.0 docstring-parser==0.17.0
# via jsonargparse
efficientnet-pytorch==0.7.1
# via segmentation-models-pytorch
einops==0.8.1
# via # via
# -r requirements/test.in # -r requirements/test.in
# encodec # encodec
# mamba-ssm # mamba-ssm
# terratorch
# torchgeo
# vector-quantize-pytorch # vector-quantize-pytorch
# vocos # vocos
einx==0.3.0 einx==0.3.0
...@@ -141,6 +195,8 @@ eval-type-backport==0.2.2 ...@@ -141,6 +195,8 @@ eval-type-backport==0.2.2
# via mteb # via mteb
evaluate==0.4.3 evaluate==0.4.3
# via lm-eval # via lm-eval
fastapi==0.116.1
# via mlflow-skinny
fastparquet==2024.11.0 fastparquet==2024.11.0
# via genai-perf # via genai-perf
fastrlock==0.8.2 fastrlock==0.8.2
...@@ -156,6 +212,10 @@ filelock==3.16.1 ...@@ -156,6 +212,10 @@ filelock==3.16.1
# torch # torch
# transformers # transformers
# virtualenv # virtualenv
fiona==1.10.1
# via torchgeo
flask==3.1.1
# via mlflow
fonttools==4.54.1 fonttools==4.54.1
# via matplotlib # via matplotlib
fqdn==1.5.1 fqdn==1.5.1
...@@ -173,23 +233,50 @@ fsspec==2024.9.0 ...@@ -173,23 +233,50 @@ fsspec==2024.9.0
# evaluate # evaluate
# fastparquet # fastparquet
# huggingface-hub # huggingface-hub
# lightning
# pytorch-lightning
# torch # torch
ftfy==6.3.1
# via open-clip-torch
genai-perf==0.0.8 genai-perf==0.0.8
# via -r requirements/test.in # via -r requirements/test.in
genson==1.3.0 genson==1.3.0
# via datamodel-code-generator # via datamodel-code-generator
geopandas==1.0.1
# via terratorch
gitdb==4.0.12
# via gitpython
gitpython==3.1.44
# via mlflow-skinny
google-api-core==2.24.2 google-api-core==2.24.2
# via opencensus # via opencensus
google-auth==2.40.2 google-auth==2.40.2
# via google-api-core # via
# databricks-sdk
# google-api-core
googleapis-common-protos==1.70.0 googleapis-common-protos==1.70.0
# via google-api-core # via google-api-core
graphene==3.4.3
# via mlflow
graphql-core==3.2.6 graphql-core==3.2.6
# via hypothesis-graphql # via
# graphene
# graphql-relay
# hypothesis-graphql
graphql-relay==3.2.0
# via graphene
greenlet==3.2.3
# via sqlalchemy
grpcio==1.71.0 grpcio==1.71.0
# via ray # via ray
gunicorn==23.0.0
# via mlflow
h11==0.14.0 h11==0.14.0
# via httpcore # via
# httpcore
# uvicorn
h5py==3.13.0
# via terratorch
harfile==0.3.0 harfile==0.3.0
# via schemathesis # via schemathesis
hf-xet==1.1.3 hf-xet==1.1.3
...@@ -202,20 +289,27 @@ httpx==0.27.2 ...@@ -202,20 +289,27 @@ httpx==0.27.2
# via # via
# -r requirements/test.in # -r requirements/test.in
# schemathesis # schemathesis
huggingface-hub==0.33.0 huggingface-hub==0.33.1
# via # via
# -r requirements/test.in # -r requirements/test.in
# accelerate # accelerate
# datasets # datasets
# evaluate # evaluate
# open-clip-torch
# peft # peft
# segmentation-models-pytorch
# sentence-transformers # sentence-transformers
# terratorch
# timm # timm
# tokenizers # tokenizers
# transformers # transformers
# vocos # vocos
humanize==4.11.0 humanize==4.11.0
# via runai-model-streamer # via runai-model-streamer
hydra-core==1.3.2
# via
# lightly
# lightning
hypothesis==6.131.0 hypothesis==6.131.0
# via # via
# hypothesis-graphql # hypothesis-graphql
...@@ -233,6 +327,14 @@ idna==3.10 ...@@ -233,6 +327,14 @@ idna==3.10
# jsonschema # jsonschema
# requests # requests
# yarl # yarl
imageio==2.37.0
# via scikit-image
importlib-metadata==8.7.0
# via
# mlflow-skinny
# opentelemetry-api
importlib-resources==6.5.2
# via typeshed-client
inflect==5.6.2 inflect==5.6.2
# via datamodel-code-generator # via datamodel-code-generator
iniconfig==2.0.0 iniconfig==2.0.0
...@@ -241,9 +343,13 @@ isoduration==20.11.0 ...@@ -241,9 +343,13 @@ isoduration==20.11.0
# via jsonschema # via jsonschema
isort==5.13.2 isort==5.13.2
# via datamodel-code-generator # via datamodel-code-generator
itsdangerous==2.2.0
# via flask
jinja2==3.1.6 jinja2==3.1.6
# via # via
# datamodel-code-generator # datamodel-code-generator
# flask
# mlflow
# torch # torch
jiwer==3.0.5 jiwer==3.0.5
# via -r requirements/test.in # via -r requirements/test.in
...@@ -256,6 +362,10 @@ joblib==1.4.2 ...@@ -256,6 +362,10 @@ joblib==1.4.2
# librosa # librosa
# nltk # nltk
# scikit-learn # scikit-learn
jsonargparse==4.35.0
# via
# lightning
# terratorch
jsonlines==4.0.0 jsonlines==4.0.0
# via lm-eval # via lm-eval
jsonpointer==3.0.0 jsonpointer==3.0.0
...@@ -274,12 +384,33 @@ kaleido==0.2.1 ...@@ -274,12 +384,33 @@ kaleido==0.2.1
# via genai-perf # via genai-perf
kiwisolver==1.4.7 kiwisolver==1.4.7
# via matplotlib # via matplotlib
kornia==0.8.1
# via torchgeo
kornia-rs==0.1.9
# via kornia
lazy-loader==0.4 lazy-loader==0.4
# via librosa # via
# librosa
# scikit-image
libnacl==2.1.0 libnacl==2.1.0
# via tensorizer # via tensorizer
librosa==0.10.2.post1 librosa==0.10.2.post1
# via -r requirements/test.in # via -r requirements/test.in
lightly==1.5.20
# via
# terratorch
# torchgeo
lightly-utils==0.0.2
# via lightly
lightning==2.5.1.post0
# via
# terratorch
# torchgeo
lightning-utilities==0.14.3
# via
# lightning
# pytorch-lightning
# torchmetrics
llvmlite==0.44.0 llvmlite==0.44.0
# via numba # via numba
lm-eval==0.4.8 lm-eval==0.4.8
...@@ -288,16 +419,27 @@ lxml==5.3.0 ...@@ -288,16 +419,27 @@ lxml==5.3.0
# via # via
# blobfile # blobfile
# sacrebleu # sacrebleu
mako==1.3.10
# via alembic
mamba-ssm==2.2.4 mamba-ssm==2.2.4
# via -r requirements/test.in # via -r requirements/test.in
markdown==3.8.2
# via mlflow
markdown-it-py==3.0.0 markdown-it-py==3.0.0
# via rich # via rich
markupsafe==3.0.1 markupsafe==3.0.1
# via # via
# flask
# jinja2 # jinja2
# mako
# werkzeug # werkzeug
matplotlib==3.9.2 matplotlib==3.9.2
# via -r requirements/test.in # via
# -r requirements/test.in
# lightning
# mlflow
# pycocotools
# torchgeo
mbstrdecoder==1.1.3 mbstrdecoder==1.1.3
# via # via
# dataproperty # dataproperty
...@@ -305,8 +447,12 @@ mbstrdecoder==1.1.3 ...@@ -305,8 +447,12 @@ mbstrdecoder==1.1.3
# typepy # typepy
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
mistral-common==1.6.2 mistral-common==1.8.2
# via -r requirements/test.in # via -r requirements/test.in
mlflow==2.22.0
# via terratorch
mlflow-skinny==2.22.0
# via mlflow
more-itertools==10.5.0 more-itertools==10.5.0
# via lm-eval # via lm-eval
mpmath==1.3.0 mpmath==1.3.0
...@@ -325,10 +471,14 @@ multiprocess==0.70.16 ...@@ -325,10 +471,14 @@ multiprocess==0.70.16
# via # via
# datasets # datasets
# evaluate # evaluate
munch==4.0.0
# via pretrainedmodels
mypy-extensions==1.0.0 mypy-extensions==1.0.0
# via black # via black
networkx==3.2.1 networkx==3.2.1
# via torch # via
# scikit-image
# torch
ninja==1.11.1.3 ninja==1.11.1.3
# via mamba-ssm # via mamba-ssm
nltk==3.9.1 nltk==3.9.1
...@@ -345,6 +495,8 @@ numpy==1.26.4 ...@@ -345,6 +495,8 @@ numpy==1.26.4
# via # via
# -r requirements/test.in # -r requirements/test.in
# accelerate # accelerate
# albucore
# albumentations
# bitsandbytes # bitsandbytes
# bm25s # bm25s
# contourpy # contourpy
...@@ -355,9 +507,15 @@ numpy==1.26.4 ...@@ -355,9 +507,15 @@ numpy==1.26.4
# evaluate # evaluate
# fastparquet # fastparquet
# genai-perf # genai-perf
# geopandas
# h5py
# imageio
# librosa # librosa
# lightly
# lightly-utils
# matplotlib # matplotlib
# mistral-common # mistral-common
# mlflow
# mteb # mteb
# numba # numba
# numexpr # numexpr
...@@ -365,18 +523,30 @@ numpy==1.26.4 ...@@ -365,18 +523,30 @@ numpy==1.26.4
# pandas # pandas
# patsy # patsy
# peft # peft
# pycocotools
# pyogrio
# rasterio
# rioxarray
# rouge-score # rouge-score
# runai-model-streamer # runai-model-streamer
# sacrebleu # sacrebleu
# scikit-image
# scikit-learn # scikit-learn
# scipy # scipy
# segmentation-models-pytorch
# shapely
# soxr # soxr
# statsmodels # statsmodels
# tensorboardx
# tensorizer # tensorizer
# tifffile
# torchgeo
# torchmetrics
# torchvision # torchvision
# transformers # transformers
# tritonclient # tritonclient
# vocos # vocos
# xarray
nvidia-cublas-cu12==12.8.3.14 nvidia-cublas-cu12==12.8.3.14
# via # via
# nvidia-cudnn-cu12 # nvidia-cudnn-cu12
...@@ -414,6 +584,12 @@ nvidia-nvjitlink-cu12==12.8.61 ...@@ -414,6 +584,12 @@ nvidia-nvjitlink-cu12==12.8.61
# torch # torch
nvidia-nvtx-cu12==12.8.55 nvidia-nvtx-cu12==12.8.55
# via torch # via torch
omegaconf==2.3.0
# via
# hydra-core
# lightning
open-clip-torch==2.32.0
# via -r requirements/test.in
opencensus==0.11.4 opencensus==0.11.4
# via ray # via ray
opencensus-context==0.1.3 opencensus-context==0.1.3
...@@ -421,7 +597,18 @@ opencensus-context==0.1.3 ...@@ -421,7 +597,18 @@ opencensus-context==0.1.3
opencv-python-headless==4.11.0.86 opencv-python-headless==4.11.0.86
# via # via
# -r requirements/test.in # -r requirements/test.in
# albucore
# albumentations
# mistral-common # mistral-common
opentelemetry-api==1.35.0
# via
# mlflow-skinny
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-sdk==1.35.0
# via mlflow-skinny
opentelemetry-semantic-conventions==0.56b0
# via opentelemetry-sdk
packaging==24.2 packaging==24.2
# via # via
# accelerate # accelerate
...@@ -430,26 +617,44 @@ packaging==24.2 ...@@ -430,26 +617,44 @@ packaging==24.2
# datasets # datasets
# evaluate # evaluate
# fastparquet # fastparquet
# geopandas
# gunicorn
# huggingface-hub # huggingface-hub
# hydra-core
# kornia
# lazy-loader # lazy-loader
# lightning
# lightning-utilities
# mamba-ssm # mamba-ssm
# matplotlib # matplotlib
# mlflow-skinny
# peft # peft
# plotly # plotly
# pooch # pooch
# pyogrio
# pytest # pytest
# pytest-rerunfailures # pytest-rerunfailures
# pytorch-lightning
# ray # ray
# rioxarray
# scikit-image
# statsmodels # statsmodels
# tensorboardx
# torchmetrics
# transformers # transformers
# typepy # typepy
# xarray
pandas==2.2.3 pandas==2.2.3
# via # via
# datasets # datasets
# evaluate # evaluate
# fastparquet # fastparquet
# genai-perf # genai-perf
# geopandas
# mlflow
# statsmodels # statsmodels
# torchgeo
# xarray
pathspec==0.12.1 pathspec==0.12.1
# via black # via black
pathvalidate==3.2.1 pathvalidate==3.2.1
...@@ -463,9 +668,14 @@ peft==0.13.2 ...@@ -463,9 +668,14 @@ peft==0.13.2
pillow==10.4.0 pillow==10.4.0
# via # via
# genai-perf # genai-perf
# imageio
# lightly-utils
# matplotlib # matplotlib
# mistral-common # mistral-common
# scikit-image
# segmentation-models-pytorch
# sentence-transformers # sentence-transformers
# torchgeo
# torchvision # torchvision
platformdirs==4.3.6 platformdirs==4.3.6
# via # via
...@@ -484,6 +694,8 @@ portalocker==2.10.1 ...@@ -484,6 +694,8 @@ portalocker==2.10.1
# via sacrebleu # via sacrebleu
pqdm==0.2.0 pqdm==0.2.0
# via -r requirements/test.in # via -r requirements/test.in
pretrainedmodels==0.7.4
# via segmentation-models-pytorch
prometheus-client==0.22.0 prometheus-client==0.22.0
# via ray # via ray
propcache==0.2.0 propcache==0.2.0
...@@ -494,8 +706,10 @@ protobuf==5.28.3 ...@@ -494,8 +706,10 @@ protobuf==5.28.3
# via # via
# google-api-core # google-api-core
# googleapis-common-protos # googleapis-common-protos
# mlflow-skinny
# proto-plus # proto-plus
# ray # ray
# tensorboardx
# tensorizer # tensorizer
psutil==6.1.0 psutil==6.1.0
# via # via
...@@ -510,6 +724,7 @@ pyarrow==18.0.0 ...@@ -510,6 +724,7 @@ pyarrow==18.0.0
# via # via
# datasets # datasets
# genai-perf # genai-perf
# mlflow
pyasn1==0.6.1 pyasn1==0.6.1
# via # via
# pyasn1-modules # pyasn1-modules
...@@ -518,6 +733,10 @@ pyasn1-modules==0.4.2 ...@@ -518,6 +733,10 @@ pyasn1-modules==0.4.2
# via google-auth # via google-auth
pybind11==2.13.6 pybind11==2.13.6
# via lm-eval # via lm-eval
pycocotools==2.0.8
# via terratorch
pycountry==24.6.1
# via pydantic-extra-types
pycparser==2.22 pycparser==2.22
# via cffi # via cffi
pycryptodomex==3.22.0 pycryptodomex==3.22.0
...@@ -525,23 +744,39 @@ pycryptodomex==3.22.0 ...@@ -525,23 +744,39 @@ pycryptodomex==3.22.0
pydantic==2.11.5 pydantic==2.11.5
# via # via
# -r requirements/test.in # -r requirements/test.in
# albumentations
# datamodel-code-generator # datamodel-code-generator
# fastapi
# lightly
# mistral-common # mistral-common
# mlflow-skinny
# mteb # mteb
# pydantic-extra-types
# ray # ray
pydantic-core==2.33.2 pydantic-core==2.33.2
# via pydantic # via pydantic
pydantic-extra-types==2.10.5
# via mistral-common
pygments==2.18.0 pygments==2.18.0
# via rich # via rich
pyogrio==0.11.0
# via geopandas
pyparsing==3.2.0 pyparsing==3.2.0
# via matplotlib # via
# matplotlib
# rasterio
pyproj==3.7.1
# via
# geopandas
# rioxarray
# torchgeo
pyrate-limiter==3.7.0 pyrate-limiter==3.7.0
# via schemathesis # via schemathesis
pystemmer==3.0.0 pystemmer==3.0.0
# via mteb # via mteb
pytablewriter==1.2.0 pytablewriter==1.2.0
# via lm-eval # via lm-eval
pytest==8.3.3 pytest==8.3.5
# via # via
# -r requirements/test.in # -r requirements/test.in
# buildkite-test-collector # buildkite-test-collector
...@@ -554,6 +789,7 @@ pytest==8.3.3 ...@@ -554,6 +789,7 @@ pytest==8.3.3
# pytest-subtests # pytest-subtests
# pytest-timeout # pytest-timeout
# schemathesis # schemathesis
# terratorch
pytest-asyncio==0.24.0 pytest-asyncio==0.24.0
# via -r requirements/test.in # via -r requirements/test.in
pytest-forked==1.6.0 pytest-forked==1.6.0
...@@ -568,15 +804,23 @@ pytest-subtests==0.14.1 ...@@ -568,15 +804,23 @@ pytest-subtests==0.14.1
# via schemathesis # via schemathesis
pytest-timeout==2.3.1 pytest-timeout==2.3.1
# via -r requirements/test.in # via -r requirements/test.in
python-box==7.3.2
# via terratorch
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
# via # via
# arrow # arrow
# botocore # botocore
# graphene
# lightly
# matplotlib # matplotlib
# pandas # pandas
# typepy # typepy
python-rapidjson==1.20 python-rapidjson==1.20
# via tritonclient # via tritonclient
pytorch-lightning==2.5.2
# via
# lightly
# lightning
pytrec-eval-terrier==0.5.7 pytrec-eval-terrier==0.5.7
# via mteb # via mteb
pytz==2024.2 pytz==2024.2
...@@ -586,11 +830,17 @@ pytz==2024.2 ...@@ -586,11 +830,17 @@ pytz==2024.2
pyyaml==6.0.2 pyyaml==6.0.2
# via # via
# accelerate # accelerate
# albumentations
# datamodel-code-generator # datamodel-code-generator
# datasets # datasets
# genai-perf # genai-perf
# huggingface-hub # huggingface-hub
# jsonargparse
# lightning
# mlflow-skinny
# omegaconf
# peft # peft
# pytorch-lightning
# ray # ray
# responses # responses
# schemathesis # schemathesis
...@@ -599,6 +849,11 @@ pyyaml==6.0.2 ...@@ -599,6 +849,11 @@ pyyaml==6.0.2
# vocos # vocos
rapidfuzz==3.12.1 rapidfuzz==3.12.1
# via jiwer # via jiwer
rasterio==1.4.3
# via
# rioxarray
# terratorch
# torchgeo
ray==2.43.0 ray==2.43.0
# via -r requirements/test.in # via -r requirements/test.in
redis==5.2.0 redis==5.2.0
...@@ -610,18 +865,23 @@ referencing==0.35.1 ...@@ -610,18 +865,23 @@ referencing==0.35.1
regex==2024.9.11 regex==2024.9.11
# via # via
# nltk # nltk
# open-clip-torch
# sacrebleu # sacrebleu
# tiktoken # tiktoken
# transformers # transformers
requests==2.32.3 requests==2.32.3
# via # via
# buildkite-test-collector # buildkite-test-collector
# databricks-sdk
# datasets # datasets
# docker
# evaluate # evaluate
# google-api-core # google-api-core
# huggingface-hub # huggingface-hub
# lightly
# lm-eval # lm-eval
# mistral-common # mistral-common
# mlflow-skinny
# mteb # mteb
# pooch # pooch
# ray # ray
...@@ -639,8 +899,11 @@ rfc3987==1.3.8 ...@@ -639,8 +899,11 @@ rfc3987==1.3.8
rich==13.9.4 rich==13.9.4
# via # via
# genai-perf # genai-perf
# lightning
# mteb # mteb
# typer # typer
rioxarray==0.19.0
# via terratorch
rouge-score==0.1.2 rouge-score==0.1.2
# via lm-eval # via lm-eval
rpds-py==0.20.1 rpds-py==0.20.1
...@@ -649,6 +912,8 @@ rpds-py==0.20.1 ...@@ -649,6 +912,8 @@ rpds-py==0.20.1
# referencing # referencing
rsa==4.9.1 rsa==4.9.1
# via google-auth # via google-auth
rtree==1.4.0
# via torchgeo
runai-model-streamer==0.11.0 runai-model-streamer==0.11.0
# via -r requirements/test.in # via -r requirements/test.in
runai-model-streamer-s3==0.11.0 runai-model-streamer-s3==0.11.0
...@@ -660,26 +925,38 @@ sacrebleu==2.4.3 ...@@ -660,26 +925,38 @@ sacrebleu==2.4.3
safetensors==0.4.5 safetensors==0.4.5
# via # via
# accelerate # accelerate
# open-clip-torch
# peft # peft
# timm # timm
# transformers # transformers
schemathesis==3.39.15 schemathesis==3.39.15
# via -r requirements/test.in # via -r requirements/test.in
scikit-image==0.25.2
# via albumentations
scikit-learn==1.5.2 scikit-learn==1.5.2
# via # via
# albumentations
# librosa # librosa
# lm-eval # lm-eval
# mlflow
# mteb # mteb
# sentence-transformers # sentence-transformers
scipy==1.13.1 scipy==1.13.1
# via # via
# albumentations
# bm25s # bm25s
# librosa # librosa
# mlflow
# mteb # mteb
# scikit-image
# scikit-learn # scikit-learn
# sentence-transformers # sentence-transformers
# statsmodels # statsmodels
# vocos # vocos
segmentation-models-pytorch==0.4.0
# via
# terratorch
# torchgeo
sentence-transformers==3.2.1 sentence-transformers==3.2.1
# via # via
# -r requirements/test.in # -r requirements/test.in
...@@ -688,21 +965,30 @@ sentencepiece==0.2.0 ...@@ -688,21 +965,30 @@ sentencepiece==0.2.0
# via mistral-common # via mistral-common
setuptools==77.0.3 setuptools==77.0.3
# via # via
# lightning-utilities
# mamba-ssm # mamba-ssm
# pytablewriter # pytablewriter
# torch # torch
# triton # triton
shapely==2.1.1
# via
# geopandas
# torchgeo
shellingham==1.5.4 shellingham==1.5.4
# via typer # via typer
six==1.16.0 six==1.16.0
# via # via
# junit-xml # junit-xml
# lightly
# opencensus # opencensus
# python-dateutil # python-dateutil
# rfc3339-validator # rfc3339-validator
# rouge-score # rouge-score
# segmentation-models-pytorch
smart-open==7.1.0 smart-open==7.1.0
# via ray # via ray
smmap==5.0.2
# via gitdb
sniffio==1.3.1 sniffio==1.3.1
# via # via
# anyio # anyio
...@@ -713,12 +999,22 @@ soundfile==0.12.1 ...@@ -713,12 +999,22 @@ soundfile==0.12.1
# via # via
# -r requirements/test.in # -r requirements/test.in
# librosa # librosa
# mistral-common
soxr==0.5.0.post1 soxr==0.5.0.post1
# via librosa # via
# librosa
# mistral-common
sqlalchemy==2.0.41
# via
# alembic
# mlflow
sqlitedict==2.1.0 sqlitedict==2.1.0
# via lm-eval # via lm-eval
sqlparse==0.5.3
# via mlflow-skinny
starlette==0.46.2 starlette==0.46.2
# via # via
# fastapi
# schemathesis # schemathesis
# starlette-testclient # starlette-testclient
starlette-testclient==0.4.1 starlette-testclient==0.4.1
...@@ -739,16 +1035,29 @@ tenacity==9.0.0 ...@@ -739,16 +1035,29 @@ tenacity==9.0.0
# via # via
# lm-eval # lm-eval
# plotly # plotly
tensorizer==2.9.0 tensorboardx==2.6.4
# via lightning
tensorizer==2.10.1
# via -r requirements/test.in
terratorch==1.1rc2
# via -r requirements/test.in # via -r requirements/test.in
threadpoolctl==3.5.0 threadpoolctl==3.5.0
# via scikit-learn # via scikit-learn
tifffile==2025.3.30
# via
# scikit-image
# terratorch
tiktoken==0.7.0 tiktoken==0.7.0
# via # via
# lm-eval # lm-eval
# mistral-common # mistral-common
timm==1.0.11 timm==1.0.15
# via -r requirements/test.in # via
# -r requirements/test.in
# open-clip-torch
# segmentation-models-pytorch
# terratorch
# torchgeo
tokenizers==0.21.1 tokenizers==0.21.1
# via # via
# -r requirements/test.in # -r requirements/test.in
...@@ -757,50 +1066,81 @@ tomli==2.2.1 ...@@ -757,50 +1066,81 @@ tomli==2.2.1
# via schemathesis # via schemathesis
tomli-w==1.2.0 tomli-w==1.2.0
# via schemathesis # via schemathesis
torch==2.7.0+cu128 torch==2.7.1+cu128
# via # via
# -r requirements/test.in # -r requirements/test.in
# accelerate # accelerate
# bitsandbytes # bitsandbytes
# efficientnet-pytorch
# encodec # encodec
# fastsafetensors # fastsafetensors
# kornia
# lightly
# lightning
# lm-eval # lm-eval
# mamba-ssm # mamba-ssm
# mteb # mteb
# open-clip-torch
# peft # peft
# pretrainedmodels
# pytorch-lightning
# runai-model-streamer # runai-model-streamer
# segmentation-models-pytorch
# sentence-transformers # sentence-transformers
# tensorizer # tensorizer
# terratorch
# timm # timm
# torchaudio # torchaudio
# torchgeo
# torchmetrics
# torchvision # torchvision
# vector-quantize-pytorch # vector-quantize-pytorch
# vocos # vocos
torchaudio==2.7.0+cu128 torchaudio==2.7.1+cu128
# via # via
# -r requirements/test.in # -r requirements/test.in
# encodec # encodec
# vocos # vocos
torchvision==0.22.0+cu128 torchgeo==0.7.0
# via terratorch
torchmetrics==1.7.4
# via
# lightning
# pytorch-lightning
# terratorch
# torchgeo
torchvision==0.22.1+cu128
# via # via
# -r requirements/test.in # -r requirements/test.in
# lightly
# open-clip-torch
# pretrainedmodels
# segmentation-models-pytorch
# terratorch
# timm # timm
# torchgeo
tqdm==4.66.6 tqdm==4.66.6
# via # via
# datasets # datasets
# evaluate # evaluate
# huggingface-hub # huggingface-hub
# lightly
# lightning
# lm-eval # lm-eval
# mteb # mteb
# nltk # nltk
# open-clip-torch
# peft # peft
# pqdm # pqdm
# pretrainedmodels
# pytorch-lightning
# segmentation-models-pytorch
# sentence-transformers # sentence-transformers
# tqdm-multiprocess # tqdm-multiprocess
# transformers # transformers
tqdm-multiprocess==0.0.11 tqdm-multiprocess==0.0.11
# via lm-eval # via lm-eval
transformers==4.52.4 transformers==4.53.2
# via # via
# -r requirements/test.in # -r requirements/test.in
# genai-perf # genai-perf
...@@ -811,7 +1151,7 @@ transformers==4.52.4 ...@@ -811,7 +1151,7 @@ transformers==4.52.4
# transformers-stream-generator # transformers-stream-generator
transformers-stream-generator==0.0.5 transformers-stream-generator==0.0.5
# via -r requirements/test.in # via -r requirements/test.in
triton==3.3.0 triton==3.3.1
# via torch # via torch
tritonclient==2.51.0 tritonclient==2.51.0
# via # via
...@@ -826,17 +1166,34 @@ typer==0.15.2 ...@@ -826,17 +1166,34 @@ typer==0.15.2
# via fastsafetensors # via fastsafetensors
types-python-dateutil==2.9.0.20241206 types-python-dateutil==2.9.0.20241206
# via arrow # via arrow
typeshed-client==2.8.2
# via jsonargparse
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# albumentations
# alembic
# fastapi
# graphene
# huggingface-hub # huggingface-hub
# librosa # librosa
# lightning
# lightning-utilities
# mistral-common # mistral-common
# mlflow-skinny
# mteb # mteb
# opentelemetry-api
# opentelemetry-sdk
# opentelemetry-semantic-conventions
# pqdm # pqdm
# pydantic # pydantic
# pydantic-core # pydantic-core
# pydantic-extra-types
# pytorch-lightning
# sqlalchemy
# torch # torch
# torchgeo
# typer # typer
# typeshed-client
# typing-inspection # typing-inspection
typing-inspection==0.4.1 typing-inspection==0.4.1
# via pydantic # via pydantic
...@@ -848,23 +1205,33 @@ urllib3==2.2.3 ...@@ -848,23 +1205,33 @@ urllib3==2.2.3
# via # via
# blobfile # blobfile
# botocore # botocore
# docker
# lightly
# requests # requests
# responses # responses
# tritonclient # tritonclient
uvicorn==0.35.0
# via mlflow-skinny
vector-quantize-pytorch==1.21.2 vector-quantize-pytorch==1.21.2
# via -r requirements/test.in # via -r requirements/test.in
virtualenv==20.31.2 virtualenv==20.31.2
# via ray # via ray
vocos==0.1.0 vocos==0.1.0
# via -r requirements/test.in # via -r requirements/test.in
wcwidth==0.2.13
# via ftfy
webcolors==24.11.1 webcolors==24.11.1
# via jsonschema # via jsonschema
werkzeug==3.1.3 werkzeug==3.1.3
# via schemathesis # via
# flask
# schemathesis
word2number==1.1 word2number==1.1
# via lm-eval # via lm-eval
wrapt==1.17.2 wrapt==1.17.2
# via smart-open # via smart-open
xarray==2025.7.1
# via rioxarray
xxhash==3.5.0 xxhash==3.5.0
# via # via
# datasets # datasets
...@@ -873,5 +1240,7 @@ yarl==1.17.1 ...@@ -873,5 +1240,7 @@ yarl==1.17.1
# via # via
# aiohttp # aiohttp
# schemathesis # schemathesis
zipp==3.23.0
# via importlib-metadata
zstandard==0.23.0 zstandard==0.23.0
# via lm-eval # via lm-eval
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment