Commit 711aa9d5 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.10.0' into v0.10.0-dev

parents 751c492c 6d8d0a24
#!/bin/bash
HOST="0.0.0.0"
PORT=8006
DATA_PARALLEL_SIZE=4
REDUNDANT_EXPERTS=0
LOCAL_MODEL_PATH="/models/models--deepseek-ai--DeepSeek-V2-Lite/snapshots/604d5664dddd88a0433dbae533b7fe9472482de0"
MODEL_NAME="deepseek-ai/DeepSeek-V2-Lite"
while [[ $# -gt 0 ]]; do
case $1 in
--dp)
DATA_PARALLEL_SIZE="$2"
shift 2
;;
--re)
REDUNDANT_EXPERTS="$2"
shift 2
;;
--host)
HOST="$2"
shift 2
;;
--port)
PORT="$2"
shift 2
;;
--model)
MODEL_NAME="$2"
shift 2
;;
--local-model)
MODEL_NAME=$LOCAL_MODEL_PATH
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --dp SIZE Set data parallel size (default: 4)"
echo " --re SIZE Set redundant experts (default: 0)"
echo " --host HOST Set host address (default: 0.0.0.0)"
echo " --port PORT Set port number (default: 8006)"
echo " --model MODEL_NAME Set model name or path"
echo " -h, --help Show this help message"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use -h or --help for usage information"
exit 1
;;
esac
done
echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALLEL_SIZE and redundant experts: $REDUNDANT_EXPERTS"
export RAY_DEDUP_LOGS=0
export VLLM_USE_V1=1
export VLLM_ALL2ALL_BACKEND="pplx"
export VLLM_USE_DEEP_GEMM=1
vllm serve $MODEL_NAME \
--data-parallel-size $DATA_PARALLEL_SIZE \
--data-parallel-size-local $DATA_PARALLEL_SIZE \
--data-parallel-backend ray \
--enforce-eager \
--enable-expert-parallel \
--enable-eplb \
--num-redundant-experts $REDUNDANT_EXPERTS \
--trust-remote-code \
--host $HOST \
--port $PORT
#!/bin/bash
#
# Helper script to manually start or join a Ray cluster for online serving of vLLM models.
# This script is first executed on the head node, and then on each worker node with the IP address
# of the head node.
#
# Subcommands:
# leader: Launches a Ray head node and blocks until the cluster reaches the expected size (head + workers).
# worker: Starts a worker node that connects to an existing Ray head node.
#
# Example usage:
# On the head node machine, start the Ray head node process and run a vLLM server.
# ./multi-node-serving.sh leader --ray_port=6379 --ray_cluster_size=<SIZE> [<extra ray args>] && \
# python3 -m vllm.entrypoints.openai.api_server --port 8080 --model meta-llama/Meta-Llama-3.1-405B-Instruct --tensor-parallel-size 8 --pipeline_parallel_size 2
#
# On each worker node, start the Ray worker node process.
# ./multi-node-serving.sh worker --ray_address=<HEAD_NODE_IP> --ray_port=6379 [<extra ray args>]
#
# About Ray:
# Ray is an open-source distributed execution framework that simplifies
# distributed computing. Learn more:
# https://ray.io/
subcommand=$1
shift
ray_port=6379
ray_init_timeout=300
declare -a start_params
subcommand=$1 # Either "leader" or "worker".
shift # Remove the subcommand from the argument list.
ray_port=6379 # Port used by the Ray head node.
ray_init_timeout=300 # Seconds to wait before timing out.
declare -a start_params # Parameters forwarded to the underlying 'ray start' command.
# Handle the worker subcommand.
case "$subcommand" in
worker)
ray_address=""
......@@ -32,6 +55,7 @@ case "$subcommand" in
exit 1
fi
# Retry until the worker node connects to the head node or the timeout expires.
for (( i=0; i < $ray_init_timeout; i+=5 )); do
ray start --address=$ray_address:$ray_port --block "${start_params[@]}"
if [ $? -eq 0 ]; then
......@@ -45,6 +69,7 @@ case "$subcommand" in
exit 1
;;
# Handle the leader subcommand.
leader)
ray_cluster_size=""
while [ $# -gt 0 ]; do
......@@ -69,10 +94,10 @@ case "$subcommand" in
exit 1
fi
# start the ray daemon
# Start the Ray head node.
ray start --head --port=$ray_port "${start_params[@]}"
# wait until all workers are active
# Poll Ray until every worker node is active.
for (( i=0; i < $ray_init_timeout; i+=5 )); do
active_nodes=`python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))'`
if [ $active_nodes -eq $ray_cluster_size ]; then
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Example online usage of Score API.
Run `vllm serve <model> --task score` to start up the server in vLLM.
"""
import argparse
import pprint
import requests
def post_http_request(prompt: dict, api_url: str) -> requests.Response:
headers = {"User-Agent": "Test Client"}
response = requests.post(api_url, headers=headers, json=prompt)
return response
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument("--model", type=str, default="jinaai/jina-reranker-m0")
return parser.parse_args()
def main(args):
api_url = f"http://{args.host}:{args.port}/score"
model_name = args.model
text_1 = "slm markdown"
text_2 = {
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png"
},
},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
},
},
]
}
prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
score_response = post_http_request(prompt=prompt, api_url=api_url)
print("\nPrompt when text_1 is string and text_2 is a image list:")
pprint.pprint(prompt)
print("\nScore Response:")
pprint.pprint(score_response.json())
if __name__ == "__main__":
args = parse_args()
main(args)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Example to deploy DeepSeek R1 or V3 with Ray Serve LLM.
See more details at:
https://docs.ray.io/en/latest/serve/tutorials/serve-deepseek.html
And see Ray Serve LLM documentation at:
https://docs.ray.io/en/latest/serve/llm/serving-llms.html
Deploy DeepSeek R1 or V3 with Ray Serve LLM.
Ray Serve LLM is a scalable and production-grade model serving library built
on the Ray distributed computing framework and first-class support for the vLLM engine.
Key features:
- Automatic scaling, back-pressure, and load balancing across a Ray cluster.
- Unified multi-node multi-model deployment.
- Exposes an OpenAI-compatible HTTP API.
- Multi-LoRA support with shared base models.
Run `python3 ray_serve_deepseek.py` to deploy the model.
Run `python3 ray_serve_deepseek.py` to launch an endpoint.
Learn more in the official Ray Serve LLM documentation:
https://docs.ray.io/en/latest/serve/llm/serving-llms.html
"""
from ray import serve
......@@ -16,9 +24,8 @@ from ray.serve.llm import LLMConfig, build_openai_app
llm_config = LLMConfig(
model_loading_config={
"model_id": "deepseek",
# Since DeepSeek model is huge, it is recommended to pre-download
# the model to local disk, say /path/to/the/model and specify:
# model_source="/path/to/the/model"
# Pre-downloading the model to local storage is recommended since
# the model is large. Set model_source="/path/to/the/model".
"model_source": "deepseek-ai/DeepSeek-R1",
},
deployment_config={
......@@ -27,10 +34,10 @@ llm_config = LLMConfig(
"max_replicas": 1,
}
},
# Change to the accelerator type of the node
# Set to the node's accelerator type.
accelerator_type="H100",
runtime_env={"env_vars": {"VLLM_USE_V1": "1"}},
# Customize engine arguments as needed (e.g. vLLM engine kwargs)
# Customize engine arguments as required (for example, vLLM engine kwargs).
engine_kwargs={
"tensor_parallel_size": 8,
"pipeline_parallel_size": 2,
......@@ -44,6 +51,6 @@ llm_config = LLMConfig(
},
)
# Deploy the application
# Deploy the application.
llm_app = build_openai_app({"llm_configs": [llm_config]})
serve.run(llm_app)
#!/bin/bash
#
# Launch a Ray cluster inside Docker for vLLM inference.
#
# This script can start either a head node or a worker node, depending on the
# --head or --worker flag provided as the third positional argument.
#
# Usage:
# 1. Designate one machine as the head node and execute:
# bash run_cluster.sh \
# vllm/vllm-openai \
# <head_node_ip> \
# --head \
# /abs/path/to/huggingface/cache \
# -e VLLM_HOST_IP=<head_node_ip>
#
# 2. On every worker machine, execute:
# bash run_cluster.sh \
# vllm/vllm-openai \
# <head_node_ip> \
# --worker \
# /abs/path/to/huggingface/cache \
# -e VLLM_HOST_IP=<worker_node_ip>
#
# Each worker requires a unique VLLM_HOST_IP value.
# Keep each terminal session open. Closing a session stops the associated Ray
# node and thereby shuts down the entire cluster.
# Every machine must be reachable at the supplied IP address.
#
# The container is named "node-<random_suffix>". To open a shell inside
# a container after launch, use:
# docker exec -it node-<random_suffix> /bin/bash
#
# Then, you can execute vLLM commands on the Ray cluster as if it were a
# single machine, e.g. vllm serve ...
#
# To stop the container, use:
# docker stop node-<random_suffix>
# Check for minimum number of required arguments
# Check for minimum number of required arguments.
if [ $# -lt 4 ]; then
echo "Usage: $0 docker_image head_node_address --head|--worker path_to_hf_home [additional_args...]"
echo "Usage: $0 docker_image head_node_ip --head|--worker path_to_hf_home [additional_args...]"
exit 1
fi
# Assign the first three arguments and shift them away
# Extract the mandatory positional arguments and remove them from $@.
DOCKER_IMAGE="$1"
HEAD_NODE_ADDRESS="$2"
NODE_TYPE="$3" # Should be --head or --worker
NODE_TYPE="$3" # Should be --head or --worker.
PATH_TO_HF_HOME="$4"
shift 4
# Additional arguments are passed directly to the Docker command
# Preserve any extra arguments so they can be forwarded to Docker.
ADDITIONAL_ARGS=("$@")
# Validate node type
# Validate the NODE_TYPE argument.
if [ "${NODE_TYPE}" != "--head" ] && [ "${NODE_TYPE}" != "--worker" ]; then
echo "Error: Node type must be --head or --worker"
exit 1
fi
# Define a function to cleanup on EXIT signal
# Generate a unique container name with random suffix.
# Docker container names must be unique on each host.
# The random suffix allows multiple Ray containers to run simultaneously on the same machine,
# for example, on a multi-GPU machine.
CONTAINER_NAME="node-${RANDOM}"
# Define a cleanup routine that removes the container when the script exits.
# This prevents orphaned containers from accumulating if the script is interrupted.
cleanup() {
docker stop node
docker rm node
docker stop "${CONTAINER_NAME}"
docker rm "${CONTAINER_NAME}"
}
trap cleanup EXIT
# Command setup for head or worker node
# Build the Ray start command based on the node role.
# The head node manages the cluster and accepts connections on port 6379,
# while workers connect to the head's address.
RAY_START_CMD="ray start --block"
if [ "${NODE_TYPE}" == "--head" ]; then
RAY_START_CMD+=" --head --port=6379"
......@@ -37,11 +83,15 @@ else
RAY_START_CMD+=" --address=${HEAD_NODE_ADDRESS}:6379"
fi
# Run the docker command with the user specified parameters and additional arguments
# Launch the container with the assembled parameters.
# --network host: Allows Ray nodes to communicate directly via host networking
# --shm-size 10.24g: Increases shared memory
# --gpus all: Gives container access to all GPUs on the host
# -v HF_HOME: Mounts HuggingFace cache to avoid re-downloading models
docker run \
--entrypoint /bin/bash \
--network host \
--name node \
--name "${CONTAINER_NAME}" \
--shm-size 10.24g \
--gpus all \
-v "${PATH_TO_HF_HOME}:/root/.cache/huggingface" \
......
......@@ -4,6 +4,7 @@
import argparse
import dataclasses
import json
import logging
import os
import uuid
......@@ -15,9 +16,13 @@ from vllm.model_executor.model_loader.tensorizer import (
TensorizerConfig,
tensorize_lora_adapter,
tensorize_vllm_model,
tensorizer_kwargs_arg,
)
from vllm.utils import FlexibleArgumentParser
logger = logging.getLogger()
# yapf conflicts with isort for this docstring
# yapf: disable
"""
......@@ -119,7 +124,7 @@ vllm serve <model_path> \
"""
def parse_args():
def get_parser():
parser = FlexibleArgumentParser(
description="An example script that can be used to serialize and "
"deserialize vLLM models. These models "
......@@ -135,13 +140,13 @@ def parse_args():
required=False,
help="Path to a LoRA adapter to "
"serialize along with model tensors. This can then be deserialized "
"along with the model by passing a tensorizer_config kwarg to "
"LoRARequest with type TensorizerConfig. See the docstring for this "
"for a usage example."
"along with the model by instantiating a TensorizerConfig object, "
"creating a dict from it with TensorizerConfig.to_serializable(), "
"and passing it to LoRARequest's initializer with the kwarg "
"tensorizer_config_dict."
)
subparsers = parser.add_subparsers(dest='command')
subparsers = parser.add_subparsers(dest='command', required=True)
serialize_parser = subparsers.add_parser(
'serialize', help="Serialize a model to `--serialized-directory`")
......@@ -171,6 +176,14 @@ def parse_args():
"where `suffix` is given by `--suffix` or a random UUID if not "
"provided.")
serialize_parser.add_argument(
"--serialization-kwargs",
type=tensorizer_kwargs_arg,
required=False,
help=("A JSON string containing additional keyword arguments to "
"pass to Tensorizer's TensorSerializer during "
"serialization."))
serialize_parser.add_argument(
"--keyfile",
type=str,
......@@ -186,9 +199,17 @@ def parse_args():
deserialize_parser.add_argument(
"--path-to-tensors",
type=str,
required=True,
required=False,
help="The local path or S3 URI to the model tensors to deserialize. ")
deserialize_parser.add_argument(
"--serialized-directory",
type=str,
required=False,
help="Directory with model artifacts for loading. Assumes a "
"model.tensors file exists therein. Can supersede "
"--path-to-tensors.")
deserialize_parser.add_argument(
"--keyfile",
type=str,
......@@ -196,11 +217,27 @@ def parse_args():
help=("Path to a binary key to use to decrypt the model weights,"
" if the model was serialized with encryption"))
TensorizerArgs.add_cli_args(deserialize_parser)
deserialize_parser.add_argument(
"--deserialization-kwargs",
type=tensorizer_kwargs_arg,
required=False,
help=("A JSON string containing additional keyword arguments to "
"pass to Tensorizer's `TensorDeserializer` during "
"deserialization."))
return parser.parse_args()
TensorizerArgs.add_cli_args(deserialize_parser)
return parser
def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
cfg: TensorizerConfig):
for k, v in extra_cfg.items():
if hasattr(cfg, k):
setattr(cfg, k, v)
logger.info(
"Updating TensorizerConfig with %s from "
"--model-loader-extra-config provided", k
)
def deserialize(args, tensorizer_config):
if args.lora_path:
......@@ -230,7 +267,8 @@ def deserialize(args, tensorizer_config):
lora_request=LoRARequest("sql-lora",
1,
args.lora_path,
tensorizer_config = tensorizer_config)
tensorizer_config_dict = tensorizer_config
.to_serializable())
)
)
else:
......@@ -243,7 +281,8 @@ def deserialize(args, tensorizer_config):
def main():
args = parse_args()
parser = get_parser()
args = parser.parse_args()
s3_access_key_id = (getattr(args, 's3_access_key_id', None)
or os.environ.get("S3_ACCESS_KEY_ID", None))
......@@ -265,13 +304,24 @@ def main():
else:
keyfile = None
extra_config = {}
if args.model_loader_extra_config:
config = json.loads(args.model_loader_extra_config)
tensorizer_args = \
TensorizerConfig(**config)._construct_tensorizer_args()
tensorizer_args.tensorizer_uri = args.path_to_tensors
else:
tensorizer_args = None
extra_config = json.loads(args.model_loader_extra_config)
tensorizer_dir = (args.serialized_directory or
extra_config.get("tensorizer_dir"))
tensorizer_uri = (getattr(args, "path_to_tensors", None)
or extra_config.get("tensorizer_uri"))
if tensorizer_dir and tensorizer_uri:
parser.error("--serialized-directory and --path-to-tensors "
"cannot both be provided")
if not tensorizer_dir and not tensorizer_uri:
parser.error("Either --serialized-directory or --path-to-tensors "
"must be provided")
if args.command == "serialize":
eng_args_dict = {f.name: getattr(args, f.name) for f in
......@@ -281,7 +331,7 @@ def main():
argparse.Namespace(**eng_args_dict)
)
input_dir = args.serialized_directory.rstrip('/')
input_dir = tensorizer_dir.rstrip('/')
suffix = args.suffix if args.suffix else uuid.uuid4().hex
base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
if engine_args.tensor_parallel_size > 1:
......@@ -292,21 +342,29 @@ def main():
tensorizer_config = TensorizerConfig(
tensorizer_uri=model_path,
encryption_keyfile=keyfile,
**credentials)
serialization_kwargs=args.serialization_kwargs or {},
**credentials
)
if args.lora_path:
tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
tensorize_lora_adapter(args.lora_path, tensorizer_config)
merge_extra_config_with_tensorizer_config(extra_config,
tensorizer_config)
tensorize_vllm_model(engine_args, tensorizer_config)
elif args.command == "deserialize":
if not tensorizer_args:
tensorizer_config = TensorizerConfig(
tensorizer_uri=args.path_to_tensors,
encryption_keyfile = keyfile,
**credentials
)
tensorizer_config = TensorizerConfig(
tensorizer_uri=args.path_to_tensors,
tensorizer_dir=args.serialized_directory,
encryption_keyfile=keyfile,
deserialization_kwargs=args.deserialization_kwargs or {},
**credentials
)
merge_extra_config_with_tensorizer_config(extra_config,
tensorizer_config)
deserialize(args, tensorizer_config)
else:
raise ValueError("Either serialize or deserialize must be specified.")
......
......@@ -11,7 +11,7 @@
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- endfor -%}
{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
{% if tools is defined and tools is not none %}
......@@ -27,8 +27,8 @@
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
{% endif %}
{{ bos_token }}
{{ ns.system_prompt }}
{{- bos_token }}
{{- ns.system_prompt }}
{%- for message in messages %}
{% set content = message['content'] %}
{%- if message['role'] == 'user' %}
......@@ -45,7 +45,7 @@
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{'<|tool▁outputs▁end|>'}}
{{- '<|tool▁outputs▁end|>'}}
{%- endif %}
{%- set ns.is_first = false %}
{%- set ns.is_tool = false -%}
......@@ -53,40 +53,40 @@
{%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %}
{%- if content is none %}
{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{{- '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- else %}
{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{{- content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %}
{%- set ns.is_first = true -%}
{%- else %}
{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{{- '\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %}
{%- endfor %}
{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
{{- '<|tool▁calls▁end|><|end▁of▁sentence|>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}
{{- '<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}
{%- set ns.is_tool = false -%}
{%- else %}
{{content + '<|end▁of▁sentence|>'}}
{{- content + '<|end▁of▁sentence|>'}}
{%- endif %}
{%- endif %}
{%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%}
{%- if ns.is_output_first %}
{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{{- '<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{%- set ns.is_output_first = false %}
{%- else %}
{{'\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{{- '\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{%- endif %}
{%- endif %}
{%- endfor -%}
{% if ns.is_tool %}
{{'<|tool▁outputs▁end|>'}}
{% endif %}
{{- '<|tool▁outputs▁end|>'}}
{%- endif %}
{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
{{'<|Assistant|>'}}
{% endif %}
{{- '<|Assistant|>'}}
{%- endif %}
\ No newline at end of file
{% set loop_messages = messages %}
{% if tools %}
{% set weekday_map = {'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日'} %}
{% set weekday_cn = weekday_map[strftime_now('%A')] %}
{% set datetime_str = strftime_now('%Y-%m-%d %H:%M:%S') %}
{% set datetime_str = datetime_str + ' ' + weekday_cn %}
{% for message in loop_messages %}
{% if 'content' in message %}
{% set content = message['content'] %}
{% else %}
{% set content = '' %}
{% endif %}
{% if loop.index0 == 0 %}
{% set content_tmp = '你是一位函数组合专家。你会得到一个问题和一组可能的函数。根据问题,你需要进行一个或多个函数/工具调用以实现目的。
如果没有一个函数可以使用,请直接使用自然语言回复用户,以助手:开头。
如果给定的问题缺少函数所需的参数,请使用自然语言进行提问,向用户询问必要信息,以助手:开头。
如果调用结果已经足够回答用户问题,请对历史结果进行总结,使用自然语言回复用户,以助手:开头。
你应该只在工具调用部分返回函数调用。如果你决定调用任何函数,你必须将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>。你不应该在回复中包含任何其他文本。以下是你可以调用的函数列表,格式为JSON。
' %}
{% set content_tmp = content_tmp + '
' + tools | tojson + '
' %}
{% if message['role'] == 'system' %}
{% set content_tmp = content_tmp + '
额外要求:
' + content + '
如果你决定返回函数调用,请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>,不得包含其他文本。如果额外要求里有格式要求,请忽略,以此处为准。
否则,请参考开头说的三种情况,以助手:开头进行回复。
如果额外要求里有时间信息,就以额外要求里的时间为准,否则,参考当前时间:' + datetime_str %}
{% set content = '<|startoftext|>' + content_tmp + '<|extra_4|>' %}
{% elif message['role'] == 'user' %}
{% set content_tmp = content_tmp + '
如果你决定返回函数调用,请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>,不得包含其他文本。
否则,请参考开头说的三种情况,以助手:开头进行回复。
当前时间:' + datetime_str %}
{% set content_tmp = '<|startoftext|>' + content_tmp + '<|extra_4|>'%}
{% set content = content_tmp + '用户:' + content + '<|extra_0|>' %}
{% endif %}
{% else %}
{% if message['role'] == 'user' %}
{% set content = '用户:' + content + '<|extra_0|>' %}
{% elif message['role'] == 'assistant' %}
{% if 'tool_calls' in message %}
{% set tool_calls = message['tool_calls'] %}
{% set ns = namespace(tool_calls="[") %}
{% for tool_call in tool_calls %}
{% set function = tool_call['function'] %}
{% set name = function['name'] %}
{% set ns.tool_calls = ns.tool_calls + '{"name": "' + name + '", '%}
{% set arguments = function['arguments'] %}
{% if arguments is not string %}
{% set arguments = arguments | tojson %}
{% endif %}
{% set ns.tool_calls = ns.tool_calls + '"arguments": ' + arguments + '}' %}
{% if not loop.last %}
{% set ns.tool_calls = ns.tool_calls + ', '%}
{% endif %}
{% endfor %}
{% set ns.tool_calls = ns.tool_calls + ']' %}
{% set content = content + '<tool_calls>' + ns.tool_calls + '</tool_calls>' %}
{% else %}
{% set content = '助手:' + content %}
{% endif %}
{% set content = content + '<|eos|>' %}
{% elif message['role'] == 'tool' %}
{% if content is not string %}
{set content = content | tojson }
{% endif %}
{% set content = '<tool_response>' + content + '</tool_response>' %}
{% set content = content + '<|extra_0|>' %}
{% endif %}
{% endif %}
{{- content -}}
{% endfor %}
{% else %}
{% set context = {'has_head': true} %}
{% for message in loop_messages %}
{% if 'content' in message %}
{% set content = message['content'] %}
{% else %}
{% set content = '' %}
{% endif %}
{% if loop.index0 == 0 %}
{% if content == '' %}
{% set _ = context.update({'has_head': false}) %}
{% elif message['role'] == 'system' %}
{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}
{% endif %}
{% endif %}
{% if message['role'] == 'user' %}
{% if loop.index0 == 1 and not context.has_head %}
{% set content = '<|startoftext|>' + content %}
{% endif %}
{% if loop.index0 == 1 and context.has_head %}
{% set content = content + '<|extra_0|>' %}
{% else %}
{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}
{% endif %}
{% elif message['role'] == 'assistant' %}
{% set content = content + '<|eos|>' %}
{% elif message['role'] == 'tool' %}
{% set content = content + '<|extra_0|>' %}
{% endif %}
{{- content -}}
{% endfor %}
{% endif %}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n' }}
{%- endif %}
......@@ -3,6 +3,7 @@ site_url: https://docs.vllm.ai
repo_url: https://github.com/vllm-project/vllm
edit_uri: edit/main/docs/
exclude_docs: |
argparse
*.inc.md
*.template.md
theme:
......@@ -47,6 +48,7 @@ theme:
hooks:
- docs/mkdocs/hooks/remove_announcement.py
- docs/mkdocs/hooks/generate_examples.py
- docs/mkdocs/hooks/generate_argparse.py
- docs/mkdocs/hooks/url_schemes.py
# Required to stop api-autonav from raising an error
......@@ -59,6 +61,7 @@ plugins:
- search
- autorefs
- awesome-nav
- glightbox
# For API reference generation
- api-autonav:
modules: ["vllm"]
......
......@@ -6,7 +6,7 @@ requires = [
"packaging>=24.2",
"setuptools>=77.0.3,<80.0.0",
"setuptools-scm>=8.0",
"torch == 2.4.1",
"torch == 2.5.1",
"wheel",
"jinja2",
]
......@@ -72,8 +72,6 @@ line-length = 80
"vllm/core/**/*.py" = ["UP006", "UP035"]
"vllm/engine/**/*.py" = ["UP006", "UP035"]
"vllm/executor/**/*.py" = ["UP006", "UP035"]
"vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"]
"vllm/spec_decode/**/*.py" = ["UP006", "UP035"]
"vllm/worker/**/*.py" = ["UP006", "UP035"]
# Python 3.8 typing - skip utils for ROCm
"vllm/utils/__init__.py" = ["UP006", "UP035"]
......@@ -174,3 +172,186 @@ respect-ignore-files = true
[tool.ty.environment]
python = "./.venv"
[tool.typos.files]
# these files may be written in non english words
extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*",
"benchmarks/sonnet.txt", "tests/lora/data/*", "build/*",
"vllm/third_party/*"]
ignore-hidden = true
ignore-files = true
ignore-dot = true
ignore-vcs = true
ignore-global = true
ignore-parent = true
[tool.typos.default]
binary = false
check-filename = false
check-file = true
unicode = true
ignore-hex = true
identifier-leading-digits = false
locale = "en"
extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*",
".*[Tt]h[rR].*"]
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.default.extend-identifiers]
bbc5b7ede = "bbc5b7ede"
womens_doubles = "womens_doubles"
v_2nd = "v_2nd"
# splitted_input = "splitted_input"
NOOPs = "NOOPs"
typ = "typ"
nin_shortcut = "nin_shortcut"
UperNetDecoder = "UperNetDecoder"
subtile = "subtile"
cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
SFOuput = "SFOuput"
# huggingface transformers repo uses these words
depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
depthwise_seperable_CNN = "depthwise_seperable_CNN"
[tool.typos.default.extend-words]
iy = "iy"
tendencias = "tendencias"
# intel cpu features
tme = "tme"
dout = "dout"
Pn = "Pn"
arange = "arange"
[tool.typos.type.py]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.py.extend-identifiers]
arange = "arange"
NDArray = "NDArray"
EOFError = "EOFError"
fo = "fo"
ba = "ba"
[tool.typos.type.py.extend-words]
[tool.typos.type.cpp]
extend-glob = ["*.cu"]
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cpp.extend-identifiers]
countr_one = "countr_one"
k_ot = "k_ot"
ot = "ot"
[tool.typos.type.cpp.extend-words]
[tool.typos.type.rust]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.rust.extend-identifiers]
flate2 = "flate2"
[tool.typos.type.rust.extend-words]
ser = "ser"
[tool.typos.type.lock]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.lock.extend-identifiers]
[tool.typos.type.lock.extend-words]
[tool.typos.type.jl]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.jl.extend-identifiers]
[tool.typos.type.jl.extend-words]
modul = "modul"
egals = "egals"
usig = "usig"
egal = "egal"
[tool.typos.type.go]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.go.extend-identifiers]
flate = "flate"
[tool.typos.type.go.extend-words]
[tool.typos.type.css]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.css.extend-identifiers]
nd = "nd"
[tool.typos.type.css.extend-words]
[tool.typos.type.man]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.man.extend-identifiers]
Nd = "Nd"
[tool.typos.type.man.extend-words]
[tool.typos.type.cert]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cert.extend-identifiers]
[tool.typos.type.cert.extend-words]
[tool.typos.type.sh]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.sh.extend-identifiers]
ot = "ot"
[tool.typos.type.sh.extend-words]
[tool.typos.type.vimscript]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.vimscript.extend-identifiers]
windo = "windo"
[tool.typos.type.vimscript.extend-words]
......@@ -4,7 +4,7 @@ ninja
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
torch==2.4.1
torch==2.5.1
wheel
jinja2>=3.1.6
regex
......@@ -7,13 +7,13 @@ requests >= 2.26.0
tqdm
blake3
py-cpuinfo
transformers >= 4.51.1
transformers >= 4.53.2
huggingface-hub[hf_xet] >= 0.33.0 # Required for Xet downloads.
tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf # Required by LlamaTokenizer.
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
aiohttp
openai >= 1.52.0, <= 1.90.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support)
openai >= 1.87.0, <= 1.90.0 # Ensure modern openai package (ensure ResponsePrompt exists in type.responses and max_completion_tokens field support)
pydantic >= 2.10
prometheus_client >= 0.18.0
pillow # Required for image processing
......@@ -21,9 +21,11 @@ prometheus-fastapi-instrumentator >= 7.0.0
tiktoken >= 0.6.0 # Required for DBRX tokenizer
lm-format-enforcer >= 0.10.11, < 0.11
llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64"
outlines == 0.1.11
outlines_core == 0.2.10
# required for outlines backend disk cache
diskcache == 5.6.3
lark == 1.2.2
xgrammar == 0.1.19; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
xgrammar == 0.1.21; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
typing_extensions >= 4.10
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
partial-json-parser # used for parsing partial JSON outputs
......@@ -31,17 +33,18 @@ pyzmq >= 25.0.0
msgspec
gguf >= 0.13.0
importlib_metadata; python_version < '3.10'
mistral_common[opencv] >= 1.5.4 # requires numpy>=1.25 #1.6.2
mistral_common[image,audio] >= 1.5.4 # requires numpy>=1.25 #1.8.2
opencv-python-headless >= 4.11.0 # required for video IO
pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL.
compressed-tensors == 0.10.2 # required for compressed-tensors
depyf==0.18.0 # required for profiling and debugging with compilation config
depyf==0.19.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files
python-json-logger # Used by logging as per examples/others/logging_configuration.md
scipy # Required for phi-4-multimodal-instruct
ninja # Required for xgrammar, rocm, tpu, xpu
pybase64 # fast base64 implementation
cbor2 # Required for cross-language serialization of hashable objects
......@@ -24,6 +24,4 @@ datasets # for benchmark scripts
# Intel Extension for PyTorch, only for x86_64 CPUs
intel-openmp==2024.2.1; platform_machine == "x86_64"
intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
py-libnuma; platform_system != "Darwin"
psutil; platform_system != "Darwin"
triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile.
......@@ -6,9 +6,9 @@ numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.7.0
torchaudio==2.7.0
torch==2.7.1
torchaudio==2.7.1
# These must be updated alongside torch
torchvision==0.22.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.30
xformers==0.0.30; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
......@@ -4,6 +4,24 @@ mkdocs-material
mkdocstrings-python
mkdocs-gen-files
mkdocs-awesome-nav
mkdocs-glightbox
python-markdown-math
regex
ruff
# Required for argparse hook only
-f https://download.pytorch.org/whl/cpu
cachetools
cbor2
cloudpickle
fastapi
msgspec
openai
partial-json-parser
pillow
psutil
pybase64
pydantic
torch
transformers
zmq
# Common dependencies
-r common.txt
# Dependencies for HPU code
ray
triton==3.1.0
pandas
numpy==1.26.4
tabulate
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@f1f6624
# testing
pytest
tensorizer>=2.9.0
tensorizer==2.10.1
pytest-forked
pytest-asyncio
pytest-rerunfailures
......@@ -23,7 +23,7 @@ jiwer # required for audio tests
timm # required for internvl test
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.6.2 # required for pixtral test
mistral_common[image,audio] >= 1.8.2 # required for voxtral test
num2words # required for smolvlm test
opencv-python-headless >= 4.11.0 # required for video test
datamodel_code_generator # required for minicpm3 test
......
......@@ -11,12 +11,17 @@ datasets
ray>=2.10.0,<2.45.0
peft
pytest-asyncio
tensorizer>=2.9.0
tensorizer==2.10.1
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0
conch-triton-kernels==1.2.1
numa
python-multipart
pytrie
setuptools_scm>=8
cmake==3.29
......@@ -25,7 +30,4 @@ triton == 3.0.0
flash_attn == 2.6.1
flash_mla == 1.0.0
lmslim == 0.3.0
numa
python-multipart
pytrie
# testing
pytest
tensorizer>=2.9.0
tensorizer==2.10.1
pytest-forked
pytest-asyncio
pytest-rerunfailures
......@@ -22,19 +22,20 @@ sentence-transformers # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
timm # required for internvl test
torch==2.7.0
torchaudio==2.7.0
torchvision==0.22.0
torch==2.7.1
torchaudio==2.7.1
torchvision==0.22.1
transformers_stream_generator # required for qwen-vl test
mamba_ssm # required for plamo2 test
matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.6.2 # required for pixtral test
mistral_common[image,audio] >= 1.8.2 # required for voxtral test
num2words # required for smolvlm test
open_clip_torch==2.32.0 # Required for nemotron_vl test
opencv-python-headless >= 4.11.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.8 # required for model evaluation test
mteb[bm25s]>=1.38.11, <2 # required for mteb test
transformers==4.52.4
transformers==4.53.2
tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads.
schemathesis>=3.39.15 # Required for openai schema test.
......@@ -53,3 +54,4 @@ runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0
fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10
terratorch==1.1rc2 # required for PrithviMAE test
\ No newline at end of file
......@@ -6,6 +6,10 @@ accelerate==1.0.1
# via
# lm-eval
# peft
aenum==3.1.16
# via lightly
affine==2.4.0
# via rasterio
aiohappyeyeballs==2.4.3
# via aiohttp
aiohttp==3.10.11
......@@ -21,8 +25,18 @@ aiosignal==1.3.1
# via
# aiohttp
# ray
albucore==0.0.16
# via terratorch
albumentations==1.4.6
# via terratorch
alembic==1.16.4
# via mlflow
annotated-types==0.7.0
# via pydantic
antlr4-python3-runtime==4.9.3
# via
# hydra-core
# omegaconf
anyio==4.6.2.post1
# via
# httpx
......@@ -34,10 +48,12 @@ arrow==1.3.0
attrs==24.2.0
# via
# aiohttp
# fiona
# hypothesis
# jsonlines
# jsonschema
# pytest-subtests
# rasterio
# referencing
audioread==3.0.1
# via librosa
......@@ -46,9 +62,13 @@ backoff==2.2.1
# -r requirements/test.in
# schemathesis
bitsandbytes==0.46.1
# via -r requirements/test.in
# via
# -r requirements/test.in
# lightning
black==24.10.0
# via datamodel-code-generator
blinker==1.9.0
# via flask
blobfile==3.0.0
# via -r requirements/test.in
bm25s==0.2.13
......@@ -64,11 +84,18 @@ bounded-pool-executor==0.0.3
buildkite-test-collector==0.1.9
# via -r requirements/test.in
cachetools==5.5.2
# via google-auth
# via
# google-auth
# mlflow-skinny
certifi==2024.8.30
# via
# fiona
# httpcore
# httpx
# lightly
# pyogrio
# pyproj
# rasterio
# requests
cffi==1.17.1
# via soundfile
......@@ -79,11 +106,28 @@ charset-normalizer==3.4.0
click==8.1.7
# via
# black
# click-plugins
# cligj
# fiona
# flask
# jiwer
# mlflow-skinny
# nltk
# rasterio
# ray
# schemathesis
# typer
# uvicorn
click-plugins==1.1.1.2
# via
# fiona
# rasterio
cligj==0.7.2
# via
# fiona
# rasterio
cloudpickle==3.1.1
# via mlflow-skinny
colorama==0.4.6
# via
# sacrebleu
......@@ -99,6 +143,8 @@ cupy-cuda12x==13.3.0
# via ray
cycler==0.12.1
# via matplotlib
databricks-sdk==0.59.0
# via mlflow-skinny
datamodel-code-generator==0.26.3
# via -r requirements/test.in
dataproperty==1.0.1
......@@ -122,13 +168,21 @@ distlib==0.3.9
# via virtualenv
dnspython==2.7.0
# via email-validator
docker==7.1.0
# via mlflow
docopt==0.6.2
# via num2words
einops==0.8.0
docstring-parser==0.17.0
# via jsonargparse
efficientnet-pytorch==0.7.1
# via segmentation-models-pytorch
einops==0.8.1
# via
# -r requirements/test.in
# encodec
# mamba-ssm
# terratorch
# torchgeo
# vector-quantize-pytorch
# vocos
einx==0.3.0
......@@ -141,6 +195,8 @@ eval-type-backport==0.2.2
# via mteb
evaluate==0.4.3
# via lm-eval
fastapi==0.116.1
# via mlflow-skinny
fastparquet==2024.11.0
# via genai-perf
fastrlock==0.8.2
......@@ -156,6 +212,10 @@ filelock==3.16.1
# torch
# transformers
# virtualenv
fiona==1.10.1
# via torchgeo
flask==3.1.1
# via mlflow
fonttools==4.54.1
# via matplotlib
fqdn==1.5.1
......@@ -173,23 +233,50 @@ fsspec==2024.9.0
# evaluate
# fastparquet
# huggingface-hub
# lightning
# pytorch-lightning
# torch
ftfy==6.3.1
# via open-clip-torch
genai-perf==0.0.8
# via -r requirements/test.in
genson==1.3.0
# via datamodel-code-generator
geopandas==1.0.1
# via terratorch
gitdb==4.0.12
# via gitpython
gitpython==3.1.44
# via mlflow-skinny
google-api-core==2.24.2
# via opencensus
google-auth==2.40.2
# via google-api-core
# via
# databricks-sdk
# google-api-core
googleapis-common-protos==1.70.0
# via google-api-core
graphene==3.4.3
# via mlflow
graphql-core==3.2.6
# via hypothesis-graphql
# via
# graphene
# graphql-relay
# hypothesis-graphql
graphql-relay==3.2.0
# via graphene
greenlet==3.2.3
# via sqlalchemy
grpcio==1.71.0
# via ray
gunicorn==23.0.0
# via mlflow
h11==0.14.0
# via httpcore
# via
# httpcore
# uvicorn
h5py==3.13.0
# via terratorch
harfile==0.3.0
# via schemathesis
hf-xet==1.1.3
......@@ -202,20 +289,27 @@ httpx==0.27.2
# via
# -r requirements/test.in
# schemathesis
huggingface-hub==0.33.0
huggingface-hub==0.33.1
# via
# -r requirements/test.in
# accelerate
# datasets
# evaluate
# open-clip-torch
# peft
# segmentation-models-pytorch
# sentence-transformers
# terratorch
# timm
# tokenizers
# transformers
# vocos
humanize==4.11.0
# via runai-model-streamer
hydra-core==1.3.2
# via
# lightly
# lightning
hypothesis==6.131.0
# via
# hypothesis-graphql
......@@ -233,6 +327,14 @@ idna==3.10
# jsonschema
# requests
# yarl
imageio==2.37.0
# via scikit-image
importlib-metadata==8.7.0
# via
# mlflow-skinny
# opentelemetry-api
importlib-resources==6.5.2
# via typeshed-client
inflect==5.6.2
# via datamodel-code-generator
iniconfig==2.0.0
......@@ -241,9 +343,13 @@ isoduration==20.11.0
# via jsonschema
isort==5.13.2
# via datamodel-code-generator
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
# via
# datamodel-code-generator
# flask
# mlflow
# torch
jiwer==3.0.5
# via -r requirements/test.in
......@@ -256,6 +362,10 @@ joblib==1.4.2
# librosa
# nltk
# scikit-learn
jsonargparse==4.35.0
# via
# lightning
# terratorch
jsonlines==4.0.0
# via lm-eval
jsonpointer==3.0.0
......@@ -274,12 +384,33 @@ kaleido==0.2.1
# via genai-perf
kiwisolver==1.4.7
# via matplotlib
kornia==0.8.1
# via torchgeo
kornia-rs==0.1.9
# via kornia
lazy-loader==0.4
# via librosa
# via
# librosa
# scikit-image
libnacl==2.1.0
# via tensorizer
librosa==0.10.2.post1
# via -r requirements/test.in
lightly==1.5.20
# via
# terratorch
# torchgeo
lightly-utils==0.0.2
# via lightly
lightning==2.5.1.post0
# via
# terratorch
# torchgeo
lightning-utilities==0.14.3
# via
# lightning
# pytorch-lightning
# torchmetrics
llvmlite==0.44.0
# via numba
lm-eval==0.4.8
......@@ -288,16 +419,27 @@ lxml==5.3.0
# via
# blobfile
# sacrebleu
mako==1.3.10
# via alembic
mamba-ssm==2.2.4
# via -r requirements/test.in
markdown==3.8.2
# via mlflow
markdown-it-py==3.0.0
# via rich
markupsafe==3.0.1
# via
# flask
# jinja2
# mako
# werkzeug
matplotlib==3.9.2
# via -r requirements/test.in
# via
# -r requirements/test.in
# lightning
# mlflow
# pycocotools
# torchgeo
mbstrdecoder==1.1.3
# via
# dataproperty
......@@ -305,8 +447,12 @@ mbstrdecoder==1.1.3
# typepy
mdurl==0.1.2
# via markdown-it-py
mistral-common==1.6.2
mistral-common==1.8.2
# via -r requirements/test.in
mlflow==2.22.0
# via terratorch
mlflow-skinny==2.22.0
# via mlflow
more-itertools==10.5.0
# via lm-eval
mpmath==1.3.0
......@@ -325,10 +471,14 @@ multiprocess==0.70.16
# via
# datasets
# evaluate
munch==4.0.0
# via pretrainedmodels
mypy-extensions==1.0.0
# via black
networkx==3.2.1
# via torch
# via
# scikit-image
# torch
ninja==1.11.1.3
# via mamba-ssm
nltk==3.9.1
......@@ -345,6 +495,8 @@ numpy==1.26.4
# via
# -r requirements/test.in
# accelerate
# albucore
# albumentations
# bitsandbytes
# bm25s
# contourpy
......@@ -355,9 +507,15 @@ numpy==1.26.4
# evaluate
# fastparquet
# genai-perf
# geopandas
# h5py
# imageio
# librosa
# lightly
# lightly-utils
# matplotlib
# mistral-common
# mlflow
# mteb
# numba
# numexpr
......@@ -365,18 +523,30 @@ numpy==1.26.4
# pandas
# patsy
# peft
# pycocotools
# pyogrio
# rasterio
# rioxarray
# rouge-score
# runai-model-streamer
# sacrebleu
# scikit-image
# scikit-learn
# scipy
# segmentation-models-pytorch
# shapely
# soxr
# statsmodels
# tensorboardx
# tensorizer
# tifffile
# torchgeo
# torchmetrics
# torchvision
# transformers
# tritonclient
# vocos
# xarray
nvidia-cublas-cu12==12.8.3.14
# via
# nvidia-cudnn-cu12
......@@ -414,6 +584,12 @@ nvidia-nvjitlink-cu12==12.8.61
# torch
nvidia-nvtx-cu12==12.8.55
# via torch
omegaconf==2.3.0
# via
# hydra-core
# lightning
open-clip-torch==2.32.0
# via -r requirements/test.in
opencensus==0.11.4
# via ray
opencensus-context==0.1.3
......@@ -421,7 +597,18 @@ opencensus-context==0.1.3
opencv-python-headless==4.11.0.86
# via
# -r requirements/test.in
# albucore
# albumentations
# mistral-common
opentelemetry-api==1.35.0
# via
# mlflow-skinny
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-sdk==1.35.0
# via mlflow-skinny
opentelemetry-semantic-conventions==0.56b0
# via opentelemetry-sdk
packaging==24.2
# via
# accelerate
......@@ -430,26 +617,44 @@ packaging==24.2
# datasets
# evaluate
# fastparquet
# geopandas
# gunicorn
# huggingface-hub
# hydra-core
# kornia
# lazy-loader
# lightning
# lightning-utilities
# mamba-ssm
# matplotlib
# mlflow-skinny
# peft
# plotly
# pooch
# pyogrio
# pytest
# pytest-rerunfailures
# pytorch-lightning
# ray
# rioxarray
# scikit-image
# statsmodels
# tensorboardx
# torchmetrics
# transformers
# typepy
# xarray
pandas==2.2.3
# via
# datasets
# evaluate
# fastparquet
# genai-perf
# geopandas
# mlflow
# statsmodels
# torchgeo
# xarray
pathspec==0.12.1
# via black
pathvalidate==3.2.1
......@@ -463,9 +668,14 @@ peft==0.13.2
pillow==10.4.0
# via
# genai-perf
# imageio
# lightly-utils
# matplotlib
# mistral-common
# scikit-image
# segmentation-models-pytorch
# sentence-transformers
# torchgeo
# torchvision
platformdirs==4.3.6
# via
......@@ -484,6 +694,8 @@ portalocker==2.10.1
# via sacrebleu
pqdm==0.2.0
# via -r requirements/test.in
pretrainedmodels==0.7.4
# via segmentation-models-pytorch
prometheus-client==0.22.0
# via ray
propcache==0.2.0
......@@ -494,8 +706,10 @@ protobuf==5.28.3
# via
# google-api-core
# googleapis-common-protos
# mlflow-skinny
# proto-plus
# ray
# tensorboardx
# tensorizer
psutil==6.1.0
# via
......@@ -510,6 +724,7 @@ pyarrow==18.0.0
# via
# datasets
# genai-perf
# mlflow
pyasn1==0.6.1
# via
# pyasn1-modules
......@@ -518,6 +733,10 @@ pyasn1-modules==0.4.2
# via google-auth
pybind11==2.13.6
# via lm-eval
pycocotools==2.0.8
# via terratorch
pycountry==24.6.1
# via pydantic-extra-types
pycparser==2.22
# via cffi
pycryptodomex==3.22.0
......@@ -525,23 +744,39 @@ pycryptodomex==3.22.0
pydantic==2.11.5
# via
# -r requirements/test.in
# albumentations
# datamodel-code-generator
# fastapi
# lightly
# mistral-common
# mlflow-skinny
# mteb
# pydantic-extra-types
# ray
pydantic-core==2.33.2
# via pydantic
pydantic-extra-types==2.10.5
# via mistral-common
pygments==2.18.0
# via rich
pyogrio==0.11.0
# via geopandas
pyparsing==3.2.0
# via matplotlib
# via
# matplotlib
# rasterio
pyproj==3.7.1
# via
# geopandas
# rioxarray
# torchgeo
pyrate-limiter==3.7.0
# via schemathesis
pystemmer==3.0.0
# via mteb
pytablewriter==1.2.0
# via lm-eval
pytest==8.3.3
pytest==8.3.5
# via
# -r requirements/test.in
# buildkite-test-collector
......@@ -554,6 +789,7 @@ pytest==8.3.3
# pytest-subtests
# pytest-timeout
# schemathesis
# terratorch
pytest-asyncio==0.24.0
# via -r requirements/test.in
pytest-forked==1.6.0
......@@ -568,15 +804,23 @@ pytest-subtests==0.14.1
# via schemathesis
pytest-timeout==2.3.1
# via -r requirements/test.in
python-box==7.3.2
# via terratorch
python-dateutil==2.9.0.post0
# via
# arrow
# botocore
# graphene
# lightly
# matplotlib
# pandas
# typepy
python-rapidjson==1.20
# via tritonclient
pytorch-lightning==2.5.2
# via
# lightly
# lightning
pytrec-eval-terrier==0.5.7
# via mteb
pytz==2024.2
......@@ -586,11 +830,17 @@ pytz==2024.2
pyyaml==6.0.2
# via
# accelerate
# albumentations
# datamodel-code-generator
# datasets
# genai-perf
# huggingface-hub
# jsonargparse
# lightning
# mlflow-skinny
# omegaconf
# peft
# pytorch-lightning
# ray
# responses
# schemathesis
......@@ -599,6 +849,11 @@ pyyaml==6.0.2
# vocos
rapidfuzz==3.12.1
# via jiwer
rasterio==1.4.3
# via
# rioxarray
# terratorch
# torchgeo
ray==2.43.0
# via -r requirements/test.in
redis==5.2.0
......@@ -610,18 +865,23 @@ referencing==0.35.1
regex==2024.9.11
# via
# nltk
# open-clip-torch
# sacrebleu
# tiktoken
# transformers
requests==2.32.3
# via
# buildkite-test-collector
# databricks-sdk
# datasets
# docker
# evaluate
# google-api-core
# huggingface-hub
# lightly
# lm-eval
# mistral-common
# mlflow-skinny
# mteb
# pooch
# ray
......@@ -639,8 +899,11 @@ rfc3987==1.3.8
rich==13.9.4
# via
# genai-perf
# lightning
# mteb
# typer
rioxarray==0.19.0
# via terratorch
rouge-score==0.1.2
# via lm-eval
rpds-py==0.20.1
......@@ -649,6 +912,8 @@ rpds-py==0.20.1
# referencing
rsa==4.9.1
# via google-auth
rtree==1.4.0
# via torchgeo
runai-model-streamer==0.11.0
# via -r requirements/test.in
runai-model-streamer-s3==0.11.0
......@@ -660,26 +925,38 @@ sacrebleu==2.4.3
safetensors==0.4.5
# via
# accelerate
# open-clip-torch
# peft
# timm
# transformers
schemathesis==3.39.15
# via -r requirements/test.in
scikit-image==0.25.2
# via albumentations
scikit-learn==1.5.2
# via
# albumentations
# librosa
# lm-eval
# mlflow
# mteb
# sentence-transformers
scipy==1.13.1
# via
# albumentations
# bm25s
# librosa
# mlflow
# mteb
# scikit-image
# scikit-learn
# sentence-transformers
# statsmodels
# vocos
segmentation-models-pytorch==0.4.0
# via
# terratorch
# torchgeo
sentence-transformers==3.2.1
# via
# -r requirements/test.in
......@@ -688,21 +965,30 @@ sentencepiece==0.2.0
# via mistral-common
setuptools==77.0.3
# via
# lightning-utilities
# mamba-ssm
# pytablewriter
# torch
# triton
shapely==2.1.1
# via
# geopandas
# torchgeo
shellingham==1.5.4
# via typer
six==1.16.0
# via
# junit-xml
# lightly
# opencensus
# python-dateutil
# rfc3339-validator
# rouge-score
# segmentation-models-pytorch
smart-open==7.1.0
# via ray
smmap==5.0.2
# via gitdb
sniffio==1.3.1
# via
# anyio
......@@ -713,12 +999,22 @@ soundfile==0.12.1
# via
# -r requirements/test.in
# librosa
# mistral-common
soxr==0.5.0.post1
# via librosa
# via
# librosa
# mistral-common
sqlalchemy==2.0.41
# via
# alembic
# mlflow
sqlitedict==2.1.0
# via lm-eval
sqlparse==0.5.3
# via mlflow-skinny
starlette==0.46.2
# via
# fastapi
# schemathesis
# starlette-testclient
starlette-testclient==0.4.1
......@@ -739,16 +1035,29 @@ tenacity==9.0.0
# via
# lm-eval
# plotly
tensorizer==2.9.0
tensorboardx==2.6.4
# via lightning
tensorizer==2.10.1
# via -r requirements/test.in
terratorch==1.1rc2
# via -r requirements/test.in
threadpoolctl==3.5.0
# via scikit-learn
tifffile==2025.3.30
# via
# scikit-image
# terratorch
tiktoken==0.7.0
# via
# lm-eval
# mistral-common
timm==1.0.11
# via -r requirements/test.in
timm==1.0.15
# via
# -r requirements/test.in
# open-clip-torch
# segmentation-models-pytorch
# terratorch
# torchgeo
tokenizers==0.21.1
# via
# -r requirements/test.in
......@@ -757,50 +1066,81 @@ tomli==2.2.1
# via schemathesis
tomli-w==1.2.0
# via schemathesis
torch==2.7.0+cu128
torch==2.7.1+cu128
# via
# -r requirements/test.in
# accelerate
# bitsandbytes
# efficientnet-pytorch
# encodec
# fastsafetensors
# kornia
# lightly
# lightning
# lm-eval
# mamba-ssm
# mteb
# open-clip-torch
# peft
# pretrainedmodels
# pytorch-lightning
# runai-model-streamer
# segmentation-models-pytorch
# sentence-transformers
# tensorizer
# terratorch
# timm
# torchaudio
# torchgeo
# torchmetrics
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.7.0+cu128
torchaudio==2.7.1+cu128
# via
# -r requirements/test.in
# encodec
# vocos
torchvision==0.22.0+cu128
torchgeo==0.7.0
# via terratorch
torchmetrics==1.7.4
# via
# lightning
# pytorch-lightning
# terratorch
# torchgeo
torchvision==0.22.1+cu128
# via
# -r requirements/test.in
# lightly
# open-clip-torch
# pretrainedmodels
# segmentation-models-pytorch
# terratorch
# timm
# torchgeo
tqdm==4.66.6
# via
# datasets
# evaluate
# huggingface-hub
# lightly
# lightning
# lm-eval
# mteb
# nltk
# open-clip-torch
# peft
# pqdm
# pretrainedmodels
# pytorch-lightning
# segmentation-models-pytorch
# sentence-transformers
# tqdm-multiprocess
# transformers
tqdm-multiprocess==0.0.11
# via lm-eval
transformers==4.52.4
transformers==4.53.2
# via
# -r requirements/test.in
# genai-perf
......@@ -811,7 +1151,7 @@ transformers==4.52.4
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.3.0
triton==3.3.1
# via torch
tritonclient==2.51.0
# via
......@@ -826,17 +1166,34 @@ typer==0.15.2
# via fastsafetensors
types-python-dateutil==2.9.0.20241206
# via arrow
typeshed-client==2.8.2
# via jsonargparse
typing-extensions==4.12.2
# via
# albumentations
# alembic
# fastapi
# graphene
# huggingface-hub
# librosa
# lightning
# lightning-utilities
# mistral-common
# mlflow-skinny
# mteb
# opentelemetry-api
# opentelemetry-sdk
# opentelemetry-semantic-conventions
# pqdm
# pydantic
# pydantic-core
# pydantic-extra-types
# pytorch-lightning
# sqlalchemy
# torch
# torchgeo
# typer
# typeshed-client
# typing-inspection
typing-inspection==0.4.1
# via pydantic
......@@ -848,23 +1205,33 @@ urllib3==2.2.3
# via
# blobfile
# botocore
# docker
# lightly
# requests
# responses
# tritonclient
uvicorn==0.35.0
# via mlflow-skinny
vector-quantize-pytorch==1.21.2
# via -r requirements/test.in
virtualenv==20.31.2
# via ray
vocos==0.1.0
# via -r requirements/test.in
wcwidth==0.2.13
# via ftfy
webcolors==24.11.1
# via jsonschema
werkzeug==3.1.3
# via schemathesis
# via
# flask
# schemathesis
word2number==1.1
# via lm-eval
wrapt==1.17.2
# via smart-open
xarray==2025.7.1
# via rioxarray
xxhash==3.5.0
# via
# datasets
......@@ -873,5 +1240,7 @@ yarl==1.17.1
# via
# aiohttp
# schemathesis
zipp==3.23.0
# via importlib-metadata
zstandard==0.23.0
# via lm-eval
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment