Commit cc7f22a8 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.9.1' into v0.9.1-ori

parents b9ea0c09 b6553be1
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
vLLM Chat Assistant - A Streamlit Web Interface
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from openai import APIConnectionError, OpenAI
from openai.pagination import SyncPage
from openai.types.model import Model
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
This file demonstrates the example usage of cpu offloading
with LMCache in vLLM v1 or v0.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
This file demonstrates the example usage of disaggregated prefilling
with LMCache.
......
......@@ -33,7 +33,7 @@ check_num_gpus() {
ensure_python_library_installed() {
echo "Checking if $1 is installed..."
python -c "import $1" > /dev/null 2>&1
python3 -c "import $1" > /dev/null 2>&1
if [ $? -ne 0 ]; then
if [ "$1" == "nixl" ]; then
echo "$1 is not installed. Please refer to https://github.com/ai-dynamo/nixl for installation."
......@@ -121,8 +121,8 @@ main() {
echo "All servers are up. Starting benchmark..."
# begin benchmark
cd ../../../benchmarks/
python benchmark_serving.py --port 9000 --seed $(date +%s) \
cd ../../../../benchmarks/
python3 benchmark_serving.py --port 9000 --seed $(date +%s) \
--model meta-llama/Llama-3.1-8B-Instruct \
--dataset-name random --random-input-len 7500 --random-output-len 200 \
--num-prompts 200 --burstiness 100 --request-rate 3.6 | tee benchmark.log
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import argparse
import os
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
This file demonstrates the example usage of remote KV cache sharing
with LMCache.
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import argparse
import dataclasses
......
{% if not add_generation_prompt is defined %}
{% set add_generation_prompt = false %}
{% endif %}
{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}
{%- for message in messages %}
{%- if message['role'] == 'system' %}
{%- if ns.is_first_sp %}
{% set ns.system_prompt = ns.system_prompt + message['content'] %}
{% set ns.is_first_sp = false %}
{%- else %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %}
{%- endif %}
{%- endfor %}
{#- Adapted from https://github.com/sgl-project/sglang/blob/main/examples/chat_template/tool_chat_template_deepseekr1.jinja #}
{% if tools is defined and tools is not none %}
{% set tool_ns = namespace(text='You are a helpful assistant with tool calling capabilities. '
'When a tool call is needed, you MUST use the following format to issue the call:\n'
'<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>FUNCTION_NAME\n'
'```json\n{"param1": "value1", "param2": "value2"}\n```<|tool▁call▁end|><|tool▁calls▁end|>\n\n'
'Make sure the JSON is valid.'
'## Tools\n\n### Function\n\nYou have the following functions available:\n\n') %}
{% for tool in tools %}
{% set tool_ns.text = tool_ns.text + '\n```json\n' + (tool | tojson) + '\n```\n' %}
{% endfor %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
{% endif %}
{{ bos_token }}
{{ ns.system_prompt }}
{%- for message in messages %}
{% set content = message['content'] %}
{%- if message['role'] == 'user' %}
{%- set ns.is_tool = false -%}
{%- set ns.is_first = false -%}
{%- set ns.is_last_user = true -%}
{{'<|User|>' + content + '<|Assistant|>'}}
{%- endif %}
{%- if message['role'] == 'assistant' %}
{% if '</think>' in content %}
{% set content = content.split('</think>')[-1] %}
{% endif %}
{% endif %}
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{'<|tool▁outputs▁end|>'}}
{%- endif %}
{%- set ns.is_first = false %}
{%- set ns.is_tool = false -%}
{%- set ns.is_output_first = true %}
{%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %}
{%- if content is none %}
{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- else %}
{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %}
{%- set ns.is_first = true -%}
{%- else %}
{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments']|tojson + '\n' + '```' + '<|tool▁call▁end|>'}}
{%- endif %}
{%- endfor %}
{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}
{%- set ns.is_tool = false -%}
{%- else %}
{{content + '<|end▁of▁sentence|>'}}
{%- endif %}
{%- endif %}
{%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%}
{%- if ns.is_output_first %}
{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{%- set ns.is_output_first = false %}
{%- else %}
{{'\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}
{%- endif %}
{%- endif %}
{%- endfor -%}
{% if ns.is_tool %}
{{'<|tool▁outputs▁end|>'}}
{% endif %}
{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}
{{'<|Assistant|>'}}
{% endif %}
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import importlib
import traceback
......
[build-system]
# Should be mirrored in requirements/build.txt
requires = [
"cmake>=3.26",
"cmake>=3.26.1",
"ninja",
"packaging>=24.2",
"setuptools>=77.0.3,<80.0.0",
"setuptools-scm>=8.0",
"torch == 2.7.0",
"wheel",
"regex",
"jinja2",
]
build-backend = "setuptools.build_meta"
......@@ -110,6 +109,7 @@ ignore = [
]
[tool.mypy]
plugins = ['pydantic.mypy']
ignore_missing_imports = true
check_untyped_defs = true
follow_imports = "silent"
......@@ -171,7 +171,8 @@ plugins.md033.enabled = false # inline-html
plugins.md046.enabled = false # code-block-style
plugins.md024.allow_different_nesting = true # no-duplicate-headers
[tool.ty]
[tool.ty.src]
root = "./vllm"
respect-ignore-files = true
[tool.ty.environment]
......
# Should be mirrored in pyproject.toml
cmake>=3.26
cmake>=3.26.1
ninja
packaging>=24.2
setuptools>=77.0.3,<80.0.0
......
......@@ -14,7 +14,7 @@ protobuf # Required by LlamaTokenizer.
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
aiohttp
openai >= 1.52.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support)
pydantic >= 2.9
pydantic >= 2.10
prometheus_client >= 0.18.0
pillow # Required for image processing
prometheus-fastapi-instrumentator >= 7.0.0
......@@ -37,7 +37,7 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL.
compressed-tensors == 0.9.4 # required for compressed-tensors
compressed-tensors == 0.10.1 # required for compressed-tensors
depyf==0.18.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files
......
# Common dependencies
-r common.txt
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
# Dependencies for CPUs
packaging>=24.2
setuptools>=77.0.3,<80.0.0
......@@ -24,3 +27,5 @@ triton==3.2.0; platform_machine == "x86_64"
# Intel Extension for PyTorch, only for x86_64 CPUs
intel-openmp==2024.2.1; platform_machine == "x86_64"
intel_extension_for_pytorch==2.7.0; platform_machine == "x86_64"
py-libnuma; platform_system != "Darwin"
psutil; platform_system != "Darwin"
......@@ -9,7 +9,9 @@ pytest-shard
pytest-timeout
librosa # required by audio tests in entrypoints/openai
sentence-transformers
sentence-transformers # required for embedding tests
transformers==4.51.3
transformers_stream_generator # required for qwen-vl test
numba == 0.61.2; python_version > '3.9'
# testing utils
boto3
......@@ -39,3 +41,6 @@ matplotlib # required for qwen-vl test
num2words # required for smolvlm test
pqdm
timm # required for internvl test
schemathesis>=3.39.15 # Required for openai schema test.
mteb>=1.38.11, <2 # required for mteb test
......@@ -7,7 +7,7 @@ torchvision==0.22.0
torchaudio==2.7.0
triton==3.2
cmake>=3.26,<4
cmake>=3.26.1,<4
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
......
......@@ -12,5 +12,8 @@ ray>=2.10.0,<2.45.0
peft
pytest-asyncio
tensorizer>=2.9.0
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0
......@@ -17,7 +17,7 @@ vector_quantize_pytorch # required for minicpmo_26 test
vocos # required for minicpmo_26 test
peft
pqdm
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required by pipeline parallelism tests
ray[cgraph,default]>=2.43.0, !=2.44.* # Ray Compiled Graph, required by pipeline parallelism tests
sentence-transformers # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
......@@ -34,7 +34,7 @@ opencv-python-headless >= 4.11.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.8 # required for model evaluation test
mteb>=1.38.11, <2 # required for mteb test
transformers==4.51.3
transformers==4.52.4
tokenizers==0.21.1
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
schemathesis>=3.39.15 # Required for openai schema test.
......@@ -51,3 +51,4 @@ numpy
runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0
fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10
\ No newline at end of file
......@@ -10,9 +10,13 @@ aiohappyeyeballs==2.4.3
# via aiohttp
aiohttp==3.10.11
# via
# aiohttp-cors
# datasets
# fsspec
# lm-eval
# ray
aiohttp-cors==0.8.1
# via ray
aiosignal==1.3.1
# via
# aiohttp
......@@ -57,6 +61,8 @@ bounded-pool-executor==0.0.3
# via pqdm
buildkite-test-collector==0.1.9
# via -r requirements/test.in
cachetools==5.5.2
# via google-auth
certifi==2024.8.30
# via
# httpcore
......@@ -81,6 +87,8 @@ colorama==0.4.6
# sacrebleu
# schemathesis
# tqdm-multiprocess
colorful==0.5.6
# via ray
contourpy==1.3.0
# via matplotlib
cramjam==2.9.0
......@@ -108,6 +116,8 @@ dill==0.3.8
# evaluate
# lm-eval
# multiprocess
distlib==0.3.9
# via virtualenv
dnspython==2.7.0
# via email-validator
docopt==0.6.2
......@@ -143,6 +153,7 @@ filelock==3.16.1
# ray
# torch
# transformers
# virtualenv
fonttools==4.54.1
# via matplotlib
fqdn==1.5.1
......@@ -165,8 +176,16 @@ genai-perf==0.0.8
# via -r requirements/test.in
genson==1.3.0
# via datamodel-code-generator
google-api-core==2.24.2
# via opencensus
google-auth==2.40.2
# via google-api-core
googleapis-common-protos==1.70.0
# via google-api-core
graphql-core==3.2.6
# via hypothesis-graphql
grpcio==1.71.0
# via ray
h11==0.14.0
# via httpcore
harfile==0.3.0
......@@ -392,6 +411,10 @@ nvidia-nvjitlink-cu12==12.8.61
# torch
nvidia-nvtx-cu12==12.8.55
# via torch
opencensus==0.11.4
# via ray
opencensus-context==0.1.3
# via opencensus
opencv-python-headless==4.11.0.86
# via
# -r requirements/test.in
......@@ -445,6 +468,7 @@ platformdirs==4.3.6
# via
# black
# pooch
# virtualenv
plotly==5.24.1
# via genai-perf
pluggy==1.5.0
......@@ -457,10 +481,17 @@ portalocker==2.10.1
# via sacrebleu
pqdm==0.2.0
# via -r requirements/test.in
prometheus-client==0.22.0
# via ray
propcache==0.2.0
# via yarl
proto-plus==1.26.1
# via google-api-core
protobuf==5.28.3
# via
# google-api-core
# googleapis-common-protos
# proto-plus
# ray
# tensorizer
psutil==6.1.0
......@@ -470,22 +501,32 @@ psutil==6.1.0
# tensorizer
py==1.11.0
# via pytest-forked
py-spy==0.4.0
# via ray
pyarrow==18.0.0
# via
# datasets
# genai-perf
pyasn1==0.6.1
# via
# pyasn1-modules
# rsa
pyasn1-modules==0.4.2
# via google-auth
pybind11==2.13.6
# via lm-eval
pycparser==2.22
# via cffi
pycryptodomex==3.22.0
# via blobfile
pydantic==2.9.2
pydantic==2.11.5
# via
# -r requirements/test.in
# datamodel-code-generator
# mistral-common
# mteb
pydantic-core==2.23.4
# ray
pydantic-core==2.33.2
# via pydantic
pygments==2.18.0
# via rich
......@@ -572,6 +613,7 @@ requests==2.32.3
# buildkite-test-collector
# datasets
# evaluate
# google-api-core
# huggingface-hub
# lm-eval
# mistral-common
......@@ -600,6 +642,8 @@ rpds-py==0.20.1
# via
# jsonschema
# referencing
rsa==4.9.1
# via google-auth
runai-model-streamer==0.11.0
# via -r requirements/test.in
runai-model-streamer-s3==0.11.0
......@@ -647,9 +691,12 @@ shellingham==1.5.4
six==1.16.0
# via
# junit-xml
# opencensus
# python-dateutil
# rfc3339-validator
# rouge-score
smart-open==7.1.0
# via ray
sniffio==1.3.1
# via
# anyio
......@@ -747,7 +794,7 @@ tqdm==4.66.6
# transformers
tqdm-multiprocess==0.0.11
# via lm-eval
transformers==4.51.3
transformers==4.52.4
# via
# -r requirements/test.in
# genai-perf
......@@ -784,6 +831,9 @@ typing-extensions==4.12.2
# pydantic-core
# torch
# typer
# typing-inspection
typing-inspection==0.4.1
# via pydantic
tzdata==2024.2
# via pandas
uri-template==1.3.0
......@@ -797,6 +847,8 @@ urllib3==2.2.3
# tritonclient
vector-quantize-pytorch==1.21.2
# via -r requirements/test.in
virtualenv==20.31.2
# via ray
vocos==0.1.0
# via -r requirements/test.in
webcolors==24.11.1
......@@ -805,6 +857,8 @@ werkzeug==3.1.3
# via schemathesis
word2number==1.1
# via lm-eval
wrapt==1.17.2
# via smart-open
xxhash==3.5.0
# via
# datasets
......
......@@ -2,7 +2,7 @@
-r common.txt
# Dependencies for TPU
cmake>=3.26
cmake>=3.26.1
packaging>=24.2
setuptools-scm>=8
wheel
......@@ -18,9 +18,9 @@ setuptools==78.1.0
--find-links https://storage.googleapis.com/libtpu-releases/index.html
--find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html
--find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html
torch==2.8.0.dev20250518
torchvision==0.22.0.dev20250518
torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250518-cp39-cp39-linux_x86_64.whl ; python_version == "3.9"
torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250518-cp310-cp310-linux_x86_64.whl ; python_version == "3.10"
torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250518-cp311-cp311-linux_x86_64.whl ; python_version == "3.11"
torch==2.8.0.dev20250605
torchvision==0.23.0.dev20250605
torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250605-cp39-cp39-linux_x86_64.whl ; python_version == "3.9"
torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250605-cp310-cp310-linux_x86_64.whl ; python_version == "3.10"
torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250605-cp311-cp311-linux_x86_64.whl ; python_version == "3.11"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment