Commit 3fb4b5fa authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.18.0' into v0.18.0-ori

parents bcf25339 89138b21
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# ruff: noqa: E501
"""
Example online usage of Pooling API for ColQwen3 multi-vector retrieval.
ColQwen3 is a multi-modal late interaction model based on Qwen3-VL that
produces per-token embeddings (320-dim, L2-normalized) for both text and
image inputs. Similarity is computed via MaxSim scoring.
This example mirrors the official TomoroAI inference code
(https://huggingface.co/TomoroAI/tomoro-colqwen3-embed-4b) but uses the
vLLM serving API instead of local HuggingFace model loading.
Start the server with:
vllm serve TomoroAI/tomoro-colqwen3-embed-4b --max-model-len 4096
Then run this script:
python colqwen3_token_embed_online.py
"""
import argparse
import base64
from io import BytesIO
import numpy as np
import requests
from PIL import Image
# ── Helpers ─────────────────────────────────────────────────
def post_http_request(payload: dict, api_url: str) -> requests.Response:
headers = {"User-Agent": "Test Client"}
return requests.post(api_url, headers=headers, json=payload)
def load_image(url: str) -> Image.Image:
"""Download an image from URL (handles Wikimedia 403)."""
for hdrs in ({}, {"User-Agent": "Mozilla/5.0 (compatible; ColQwen3-demo/1.0)"}):
resp = requests.get(url, headers=hdrs, timeout=10)
if resp.status_code == 403:
continue
resp.raise_for_status()
return Image.open(BytesIO(resp.content)).convert("RGB")
raise RuntimeError(f"Could not fetch image from {url}")
def encode_image_base64(image: Image.Image) -> str:
"""Encode a PIL image to a base64 data URI."""
buf = BytesIO()
image.save(buf, format="PNG")
return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()
def compute_maxsim(q_emb: np.ndarray, d_emb: np.ndarray) -> float:
"""Compute ColBERT-style MaxSim score between query and document."""
sim = q_emb @ d_emb.T
return float(sim.max(axis=-1).sum())
# ── Encode functions ────────────────────────────────────────
def encode_queries(texts: list[str], model: str, api_url: str) -> list[np.ndarray]:
"""Encode text queries → list of multi-vector embeddings."""
resp = post_http_request({"model": model, "input": texts}, api_url)
return [np.array(item["data"]) for item in resp.json()["data"]]
def encode_images(image_urls: list[str], model: str, api_url: str) -> list[np.ndarray]:
"""Encode image documents → list of multi-vector embeddings.
Images are sent via the chat-style `messages` field so that the
vLLM multimodal processor handles them correctly.
"""
embeddings = []
for url in image_urls:
print(f" Loading: {url.split('/')[-1]}...")
image = load_image(url)
image_uri = encode_image_base64(image)
resp = post_http_request(
{
"model": model,
"messages": [
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_uri}},
{"type": "text", "text": "Describe the image."},
],
}
],
},
api_url,
)
result = resp.json()
if resp.status_code != 200 or "data" not in result:
print(f" Error ({resp.status_code}): {str(result)[:200]}")
continue
embeddings.append(np.array(result["data"][0]["data"]))
return embeddings
# ── Main ────────────────────────────────────────────────────
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=8000)
parser.add_argument(
"--model",
type=str,
default="TomoroAI/tomoro-colqwen3-embed-4b",
)
return parser.parse_args()
def main(args):
pooling_url = f"http://{args.host}:{args.port}/pooling"
score_url = f"http://{args.host}:{args.port}/score"
model = args.model
# Same sample data as the official TomoroAI example
queries = [
"Retrieve the city of Singapore",
"Retrieve the city of Beijing",
"Retrieve the city of London",
]
image_urls = [
"https://upload.wikimedia.org/wikipedia/commons/2/27/Singapore_skyline_2022.jpg",
"https://upload.wikimedia.org/wikipedia/commons/6/61/Beijing_skyline_at_night.JPG",
"https://upload.wikimedia.org/wikipedia/commons/4/49/London_skyline.jpg",
]
# ── 1) Text query embeddings ────────────────────────────
print("=" * 60)
print("1. Encode text queries (multi-vector)")
print("=" * 60)
query_embeddings = encode_queries(queries, model, pooling_url)
for i, emb in enumerate(query_embeddings):
norm = float(np.linalg.norm(emb[0]))
print(f' Query {i}: {emb.shape} (L2 norm: {norm:.4f}) "{queries[i]}"')
# ── 2) Image document embeddings ────────────────────────
print()
print("=" * 60)
print("2. Encode image documents (multi-vector)")
print("=" * 60)
doc_embeddings = encode_images(image_urls, model, pooling_url)
for i, emb in enumerate(doc_embeddings):
print(f" Doc {i}: {emb.shape} {image_urls[i].split('/')[-1]}")
# ── 3) Cross-modal MaxSim scoring ───────────────────────
if doc_embeddings:
print()
print("=" * 60)
print("3. Cross-modal MaxSim scores (text queries × image docs)")
print("=" * 60)
# Header
print(f"{'':>35s}", end="")
for j in range(len(doc_embeddings)):
print(f" Doc {j:>2d}", end="")
print()
# Score matrix
for i, q_emb in enumerate(query_embeddings):
print(f" {queries[i]:<33s}", end="")
for j, d_emb in enumerate(doc_embeddings):
score = compute_maxsim(q_emb, d_emb)
print(f" {score:6.2f}", end="")
print()
# ── 4) Text-only /score endpoint ────────────────────────
print()
print("=" * 60)
print("4. Text-only late interaction scoring (/score endpoint)")
print("=" * 60)
text_query = "What is the capital of France?"
text_docs = [
"The capital of France is Paris.",
"Berlin is the capital of Germany.",
"Python is a programming language.",
]
resp = post_http_request(
{"model": model, "text_1": text_query, "text_2": text_docs},
score_url,
)
print(f' Query: "{text_query}"\n')
for item in resp.json()["data"]:
idx = item["index"]
print(f" Doc {idx} (score={item['score']:.4f}): {text_docs[idx]}")
if __name__ == "__main__":
args = parse_args()
main(args)
...@@ -42,6 +42,7 @@ theme: ...@@ -42,6 +42,7 @@ theme:
- navigation.sections - navigation.sections
- navigation.indexes - navigation.indexes
- navigation.top - navigation.top
- navigation.path
- search.highlight - search.highlight
- search.share - search.share
- toc.follow - toc.follow
...@@ -63,8 +64,9 @@ plugins: ...@@ -63,8 +64,9 @@ plugins:
- git-revision-date-localized: - git-revision-date-localized:
# exclude autogenerated files # exclude autogenerated files
exclude: exclude:
- argparse/* - api/*
- examples/* - examples/*
- generated/*
- minify: - minify:
minify_html: true minify_html: true
minify_js: true minify_js: true
...@@ -92,7 +94,6 @@ plugins: ...@@ -92,7 +94,6 @@ plugins:
- "!.*_pb2_grpc" # Exclude auto-generated gRPC stubs - "!.*_pb2_grpc" # Exclude auto-generated gRPC stubs
summary: summary:
modules: true modules: true
show_if_no_docstring: true
show_signature_annotations: true show_signature_annotations: true
separate_signature: true separate_signature: true
show_overloads: true show_overloads: true
...@@ -105,6 +106,10 @@ plugins: ...@@ -105,6 +106,10 @@ plugins:
- https://numpy.org/doc/stable/objects.inv - https://numpy.org/doc/stable/objects.inv
- https://pytorch.org/docs/stable/objects.inv - https://pytorch.org/docs/stable/objects.inv
- https://psutil.readthedocs.io/en/stable/objects.inv - https://psutil.readthedocs.io/en/stable/objects.inv
- redirects:
redirect_maps:
features/spec_decode/README.md: features/speculative_decoding/README.md
features/spec_decode/speculators.md: features/speculative_decoding/speculators.md
markdown_extensions: markdown_extensions:
- attr_list - attr_list
...@@ -141,7 +146,6 @@ extra_css: ...@@ -141,7 +146,6 @@ extra_css:
- mkdocs/stylesheets/extra.css - mkdocs/stylesheets/extra.css
extra_javascript: extra_javascript:
- mkdocs/javascript/reo.js
- mkdocs/javascript/run_llm_widget.js - mkdocs/javascript/run_llm_widget.js
- mkdocs/javascript/mathjax.js - mkdocs/javascript/mathjax.js
- https://unpkg.com/mathjax@3.2.2/es5/tex-mml-chtml.js - https://unpkg.com/mathjax@3.2.2/es5/tex-mml-chtml.js
......
...@@ -9,7 +9,6 @@ requires = [ ...@@ -9,7 +9,6 @@ requires = [
"torch == 2.10.0", "torch == 2.10.0",
"wheel", "wheel",
"jinja2", "jinja2",
"grpcio-tools==1.78.0",
] ]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
...@@ -56,10 +55,6 @@ include = ["vllm*"] ...@@ -56,10 +55,6 @@ include = ["vllm*"]
"vllm/third_party/**" = ["ALL"] "vllm/third_party/**" = ["ALL"]
"vllm/version.py" = ["F401"] "vllm/version.py" = ["F401"]
"vllm/_version.py" = ["ALL"] "vllm/_version.py" = ["ALL"]
# Exclude generated protobuf files
"vllm/grpc/*_pb2.py" = ["ALL"]
"vllm/grpc/*_pb2_grpc.py" = ["ALL"]
"vllm/grpc/*_pb2.pyi" = ["ALL"]
[tool.ruff.lint] [tool.ruff.lint]
select = [ select = [
...@@ -112,12 +107,10 @@ markers = [ ...@@ -112,12 +107,10 @@ markers = [
"cpu_test: mark test as CPU-only test", "cpu_test: mark test as CPU-only test",
"split: run this test as part of a split", "split: run this test as part of a split",
"distributed: run this test only in distributed GPU tests", "distributed: run this test only in distributed GPU tests",
"skip_v1: do not run this test with v1",
"optional: optional tests that are automatically skipped, include --optional to run them", "optional: optional tests that are automatically skipped, include --optional to run them",
] ]
[tool.ty.src] [tool.ty.src]
root = "./vllm"
respect-ignore-files = true respect-ignore-files = true
[tool.ty.environment] [tool.ty.environment]
...@@ -125,190 +118,56 @@ python = "./.venv" ...@@ -125,190 +118,56 @@ python = "./.venv"
[tool.typos.files] [tool.typos.files]
# these files may be written in non english words # these files may be written in non english words
extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*", extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*", "tests/tokenizers_/*",
"benchmarks/sonnet.txt", "tests/lora/data/*", "build/*", "benchmarks/sonnet.txt", "tests/lora/data/*", "examples/pooling/token_embed/*", "build/*",
"vllm/third_party/*", "vllm/entrypoints/serve/instrumentator/static/*", "vllm/third_party/*", "vllm/entrypoints/serve/instrumentator/static/*", "tests/entrypoints/openai/test_transcription_validation.py",
"docs/governance/process.md"] "docs/governance/process.md", "tests/v1/engine/test_fast_incdec_prefix_err.py", ".git/*"]
ignore-hidden = true ignore-hidden = false
ignore-files = true
ignore-dot = true
ignore-vcs = true
ignore-global = true
ignore-parent = true
[tool.typos.default] [tool.typos.default]
binary = false extend-ignore-identifiers-re = [".*[Uu][Ee][0-9][Mm][0-9].*"]
check-filename = false
check-file = true
unicode = true
ignore-hex = true
identifier-leading-digits = false
locale = "en"
extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*",
".*[Tt]h[rR].*"]
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.default.extend-identifiers] [tool.typos.default.extend-identifiers]
bbc5b7ede = "bbc5b7ede" bbc5b7ede = "bbc5b7ede"
womens_doubles = "womens_doubles"
v_2nd = "v_2nd"
# splitted_input = "splitted_input"
NOOPs = "NOOPs" NOOPs = "NOOPs"
typ = "typ"
nin_shortcut = "nin_shortcut" nin_shortcut = "nin_shortcut"
UperNetDecoder = "UperNetDecoder"
subtile = "subtile"
cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin" cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin"
SFOuput = "SFOuput"
# huggingface transformers repo uses these words
depthwise_seperable_out_channel = "depthwise_seperable_out_channel" depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d" pard_token = "pard_token"
depthwise_seperable_CNN = "depthwise_seperable_CNN" ptd_token_id = "ptd_token_id"
ser_de = "ser_de"
shared_memory_per_block_optin = "shared_memory_per_block_optin"
FoPE = "FoPE"
k_ot = "k_ot"
view_seperator = "view_seperator"
inverse_std_variences = "inverse_std_variences"
[tool.typos.default.extend-words] [tool.typos.default.extend-words]
iy = "iy" iy = "iy"
tendencias = "tendencias"
indx = "indx" indx = "indx"
# intel cpu features # intel cpu features
tme = "tme" tme = "tme"
dout = "dout" dout = "dout"
Pn = "Pn" Pn = "Pn"
arange = "arange" arange = "arange"
thw = "thw"
[tool.typos.type.py] subtile = "subtile"
extend-glob = [] HSA = "HSA"
extend-ignore-identifiers-re = [] setp = "setp"
extend-ignore-words-re = [] CPY = "CPY"
extend-ignore-re = [] thr = "thr"
Thr = "Thr"
[tool.typos.type.py.extend-identifiers] PARD = "PARD"
arange = "arange" pard = "pard"
NDArray = "NDArray" AKS = "AKS"
EOFError = "EOFError"
fo = "fo"
ba = "ba"
[tool.typos.type.py.extend-words]
ba = "ba" ba = "ba"
fo = "fo"
nd = "nd" nd = "nd"
eles = "eles"
[tool.typos.type.cpp] datas = "datas"
extend-glob = ["*.cu"]
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cpp.extend-identifiers]
countr_one = "countr_one"
k_ot = "k_ot"
ot = "ot"
[tool.typos.type.cpp.extend-words]
[tool.typos.type.rust]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.rust.extend-identifiers]
flate2 = "flate2"
[tool.typos.type.rust.extend-words]
ser = "ser" ser = "ser"
ure = "ure"
[tool.typos.type.lock]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.lock.extend-identifiers]
[tool.typos.type.lock.extend-words]
[tool.typos.type.jl]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.jl.extend-identifiers]
[tool.typos.type.jl.extend-words]
modul = "modul"
egals = "egals"
usig = "usig"
egal = "egal"
[tool.typos.type.go]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.go.extend-identifiers]
flate = "flate"
[tool.typos.type.go.extend-words]
[tool.typos.type.css]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.css.extend-identifiers]
nd = "nd"
[tool.typos.type.css.extend-words]
[tool.typos.type.man]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.man.extend-identifiers]
Nd = "Nd"
[tool.typos.type.man.extend-words]
[tool.typos.type.cert]
extend-glob = []
check-file = false
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.cert.extend-identifiers]
[tool.typos.type.cert.extend-words]
[tool.typos.type.sh]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.sh.extend-identifiers]
ot = "ot"
[tool.typos.type.sh.extend-words]
[tool.typos.type.vimscript]
extend-glob = []
extend-ignore-identifiers-re = []
extend-ignore-words-re = []
extend-ignore-re = []
[tool.typos.type.vimscript.extend-identifiers]
windo = "windo"
[tool.typos.type.vimscript.extend-words]
[tool.uv] [tool.uv]
no-build-isolation-package = ["torch"] no-build-isolation-package = ["torch"]
\ No newline at end of file
...@@ -10,4 +10,3 @@ jinja2>=3.1.6 ...@@ -10,4 +10,3 @@ jinja2>=3.1.6
regex regex
build build
protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.*
grpcio-tools==1.78.0 # Required for grpc entrypoints
...@@ -12,7 +12,7 @@ tokenizers >= 0.21.1 # Required for fast incremental detokenization. ...@@ -12,7 +12,7 @@ tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994 protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
aiohttp >= 3.13.3 aiohttp >= 3.13.3
openai >= 1.99.1 # For Responses API with reasoning content openai >= 1.99.1, < 2.25.0 # For Responses API with reasoning content
pydantic >= 2.12.0 pydantic >= 2.12.0
prometheus_client >= 0.18.0 prometheus_client >= 0.18.0
pillow # Required for image processing pillow # Required for image processing
...@@ -24,14 +24,14 @@ outlines_core == 0.2.11 ...@@ -24,14 +24,14 @@ outlines_core == 0.2.11
# required for outlines backend disk cache # required for outlines backend disk cache
diskcache == 5.6.3 diskcache == 5.6.3
lark == 1.2.2 lark == 1.2.2
xgrammar == 0.1.29; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le" xgrammar >= 0.1.32, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
typing_extensions >= 4.10 typing_extensions >= 4.10
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
partial-json-parser # used for parsing partial JSON outputs partial-json-parser # used for parsing partial JSON outputs
pyzmq >= 25.0.0 pyzmq >= 25.0.0
msgspec msgspec
gguf >= 0.17.0 gguf >= 0.17.0
mistral_common[image] >= 1.9.0 mistral_common[image] >= 1.10.0
opencv-python-headless >= 4.13.0 # required for video IO opencv-python-headless >= 4.13.0 # required for video IO
pyyaml pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
...@@ -51,5 +51,7 @@ openai-harmony >= 0.0.3 # Required for gpt-oss ...@@ -51,5 +51,7 @@ openai-harmony >= 0.0.3 # Required for gpt-oss
anthropic >= 0.71.0 anthropic >= 0.71.0
model-hosting-container-standards >= 0.1.13, < 1.0.0 model-hosting-container-standards >= 0.1.13, < 1.0.0
mcp mcp
grpcio opentelemetry-sdk >= 1.27.0
grpcio-reflection opentelemetry-api >= 1.27.0
\ No newline at end of file opentelemetry-exporter-otlp >= 1.27.0
opentelemetry-semantic-conventions-ai >= 0.4.1
...@@ -7,13 +7,13 @@ numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative d ...@@ -7,13 +7,13 @@ numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative d
# Dependencies for CPUs # Dependencies for CPUs
torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x" torch==2.10.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le" torch==2.10.0; platform_machine == "aarch64" or platform_system == "Darwin" or platform_machine == "ppc64le" or platform_machine == "riscv64"
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
torchaudio; platform_machine != "s390x" torchaudio; platform_machine != "s390x" and platform_machine != "riscv64"
# required for the image processor of phi3v, this must be updated alongside torch # required for the image processor of phi3v, this must be updated alongside torch
torchvision; platform_machine != "s390x" torchvision; platform_machine != "s390x" and platform_machine != "riscv64"
# Intel Extension for PyTorch, only for x86_64 CPUs # Intel Extension for PyTorch, only for x86_64 CPUs
intel-openmp==2024.2.1; platform_machine == "x86_64" intel-openmp==2024.2.1; platform_machine == "x86_64"
......
...@@ -4,10 +4,16 @@ ...@@ -4,10 +4,16 @@
numba == 0.61.2 # Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
# Dependencies for NVIDIA GPUs # Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0
torch==2.10.0 torch==2.10.0
torchaudio==2.10.0 torchaudio==2.10.0
# These must be updated alongside torch # These must be updated alongside torch
torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# FlashInfer should be updated together with the Dockerfile # FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.6.3 flashinfer-python==0.6.6
# Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
# breaking changes in 1.19.0
nvidia-cudnn-frontend>=1.13.0,<1.19.0
# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
nvidia-cutlass-dsl>=4.4.0.dev1
quack-kernels>=0.2.7
mkdocs mkdocs<2.0.0
mkdocs-api-autonav mkdocs-api-autonav
mkdocs-material mkdocs-material
mkdocstrings-python mkdocstrings-python
...@@ -7,6 +7,7 @@ mkdocs-awesome-nav ...@@ -7,6 +7,7 @@ mkdocs-awesome-nav
mkdocs-glightbox mkdocs-glightbox
mkdocs-git-revision-date-localized-plugin mkdocs-git-revision-date-localized-plugin
mkdocs-minify-plugin mkdocs-minify-plugin
mkdocs-redirects
regex regex
ruff ruff
pydantic pydantic
......
lmcache >= 0.3.9 lmcache >= 0.3.9
nixl >= 0.7.1 # Required for disaggregated prefill nixl >= 0.7.1, < 0.10.0 # Required for disaggregated prefill
mooncake-transfer-engine >= 0.3.8
# formatting # formatting
pre-commit==4.0.1 pre-commit>=4.5.1
...@@ -23,17 +23,17 @@ jiwer # required for audio tests ...@@ -23,17 +23,17 @@ jiwer # required for audio tests
timm # required for internvl test timm # required for internvl test
transformers_stream_generator # required for qwen-vl test transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.9.0 # required for voxtral test mistral_common[image,audio] >= 1.9.1 # required for voxtral test
num2words # required for smolvlm test num2words # required for smolvlm test
opencv-python-headless >= 4.13.0 # required for video test opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.9.2 # required for model evaluation test lm-eval[api]>=0.4.11 # required for model evaluation test
mteb>=1.38.11, <2 # required for mteb test mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5 transformers==4.57.5
tokenizers==0.22.0 tokenizers==0.22.0
schemathesis>=3.39.15 # Required for openai schema test. schemathesis>=3.39.15 # Required for openai schema test.
# quantization # quantization
bitsandbytes>=0.46.1 bitsandbytes>=0.49.2
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
...@@ -42,6 +42,7 @@ tritonclient>=2.51.0 ...@@ -42,6 +42,7 @@ tritonclient>=2.51.0
numba == 0.61.2 # Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numpy numpy
runai-model-streamer[s3,gcs]==0.15.3 runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2 fastsafetensors>=0.2.2
instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13 pydantic>=2.12 # 2.11 leads to error on python 3.13
# Common dependencies # Common dependencies
-r common.txt -r common.txt
--extra-index-url https://download.pytorch.org/whl/test/rocm7.0 --extra-index-url https://download.pytorch.org/whl/rocm7.1
torch==2.10.0 torch==2.10.0
torchvision==0.25.0 torchvision==0.25.0
torchaudio==2.10.0 torchaudio==2.10.0
...@@ -12,5 +12,5 @@ setuptools>=77.0.3,<80.0.0 ...@@ -12,5 +12,5 @@ setuptools>=77.0.3,<80.0.0
setuptools-scm>=8 setuptools-scm>=8
wheel wheel
jinja2>=3.1.6 jinja2>=3.1.6
amdsmi==6.4.3 amdsmi==7.0.2
timm>=1.0.17 timm>=1.0.17
...@@ -45,6 +45,8 @@ pystemmer==3.0.0 ...@@ -45,6 +45,8 @@ pystemmer==3.0.0
# via mteb # via mteb
# Multi-modal processing # Multi-modal processing
av==16.1.0
# required for audio_in_video tests
blobfile==3.0.0 blobfile==3.0.0
# Multi-Modal Models Test # Multi-Modal Models Test
decord==0.6.0 decord==0.6.0
...@@ -58,7 +60,7 @@ schemathesis==3.39.15 ...@@ -58,7 +60,7 @@ schemathesis==3.39.15
# OpenAI schema test # OpenAI schema test
# Evaluation and benchmarking # Evaluation and benchmarking
lm-eval[api]==0.4.9.2 lm-eval[api]==0.4.11
jiwer==4.0.0 jiwer==4.0.0
# Required for multiprocessed tests that use spawn method, Datasets and Evaluate Test # Required for multiprocessed tests that use spawn method, Datasets and Evaluate Test
...@@ -67,12 +69,10 @@ multiprocess==0.70.16 ...@@ -67,12 +69,10 @@ multiprocess==0.70.16
# Required for v1/metrics/test_engine_logger_apis.py # Required for v1/metrics/test_engine_logger_apis.py
ray[cgraph,default]>=2.48.0 ray[cgraph,default]>=2.48.0
# Plugins test
terratorch @ git+https://github.com/IBM/terratorch.git@07184fcf91a1324f831ff521dd238d97fe350e3e
torchgeo==0.7.0 torchgeo==0.7.0
# via terratorch # via terratorch
# MTEB Benchmark Test # MTEB Benchmark Test
mteb==2.1.2 mteb[bm25s]>=2, <3
# Utilities # Utilities
num2words==0.5.14 num2words==0.5.14
...@@ -93,6 +93,22 @@ timm==1.0.17 ...@@ -93,6 +93,22 @@ timm==1.0.17
# Required for plugins test # Required for plugins test
albumentations==1.4.6 albumentations==1.4.6
# Pin transformers version # Pin transformers version
transformers==4.57.3 transformers==4.57.5
# Pin HF Hub version # Pin HF Hub version
huggingface-hub==0.36.2 huggingface-hub==0.36.2
# Pin Mistral Common
mistral-common[image,audio]==1.10.0
# Required for Prithvi tests
terratorch==1.2.2
# Required for Prithvi tests
segmentation-models-pytorch==0.5.0
# Required for Prithvi tests
imagehash==4.3.2
# Required for bitsandbytes quantization test
bitsandbytes==0.49.2
# Examples (tensorizer) tests
tensorizer==2.10.1
# Multi-modal models test (`allendou/FireRedASR2-LLM-vllm`)
kaldi-native-fbank==1.22.3
# Pinning numpy version
numpy==2.2.6
# Common dependencies # Common dependencies
-r common.txt -r common.txt
# The version of gRPC libraries should be consistent with each other
grpcio==1.78.0
grpcio-reflection==1.78.0
numba == 0.61.2 # Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
# Dependencies for AMD GPUs # Dependencies for AMD GPUs
datasets datasets
ray[cgraph]>=2.48.0
peft peft
pytest-asyncio pytest-asyncio
tensorizer==2.10.1 tensorizer==2.10.1
packaging>=24.2 packaging>=24.2
setuptools>=77.0.3,<80.0.0 setuptools>=77.0.3,<80.0.0
setuptools-scm>=8 setuptools-scm>=8
runai-model-streamer[s3,gcs,azure]==0.15.7
runai-model-streamer[s3,gcs]==0.15.3
# conch-triton-kernels==1.2.1 # conch-triton-kernels==1.2.1
timm>=1.0.17 timm>=1.0.17
grpcio-tools==1.78.0 # Should match `build.txt` # amd-quark: required for Quark quantization on ROCm
\ No newline at end of file # To be consistent with test_quark.py
amd-quark>=0.8.99
\ No newline at end of file
...@@ -10,6 +10,7 @@ pytest-cov ...@@ -10,6 +10,7 @@ pytest-cov
# testing utils # testing utils
albumentations # required for Nemotron Parse in test_common.py albumentations # required for Nemotron Parse in test_common.py
av # required for audio_in_video tests
backoff # required for phi4mm test backoff # required for phi4mm test
blobfile # required for kimi-vl test blobfile # required for kimi-vl test
einops # required for MPT, qwen-vl einops # required for MPT, qwen-vl
...@@ -30,33 +31,48 @@ torchaudio==2.10.0 ...@@ -30,33 +31,48 @@ torchaudio==2.10.0
torchvision==0.25.0 torchvision==0.25.0
transformers_stream_generator # required for qwen-vl test transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.9.0 # required for voxtral test mistral_common[image,audio] >= 1.9.1 # required for voxtral test
num2words # required for smolvlm test num2words # required for smolvlm test
open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py
opencv-python-headless >= 4.13.0 # required for video test opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.9.2 # required for model evaluation test lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5 transformers==4.57.5
tokenizers==0.22.0 tokenizers==0.22.0
schemathesis>=3.39.15 # Required for openai schema test. schemathesis>=3.39.15 # Required for openai schema test.
# quantization # quantization
bitsandbytes==0.46.1 bitsandbytes==0.49.2
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
genai_perf>=0.0.8 genai_perf>=0.0.8
tritonclient>=2.51.0 tritonclient>=2.51.0
grpcio-tools==1.78.0 # Should match `build.txt` # The version of gRPC libraries should be consistent with each other
grpcio==1.78.0
grpcio-reflection==1.78.0
arctic-inference == 0.1.1 # Required for suffix decoding test arctic-inference == 0.1.1 # Required for suffix decoding test
numba == 0.61.2 # Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numpy numpy
runai-model-streamer[s3,gcs]==0.15.3 runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage
instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13 pydantic>=2.12 # 2.11 leads to error on python 3.13
decord==0.6.0 decord==0.6.0
terratorch @ git+https://github.com/IBM/terratorch.git@1.1.rc3 # required for PrithviMAE test terratorch >= 1.2.2 # Required for Prithvi tests
imagehash # Required for Prithvi tests
segmentation-models-pytorch > 0.4.0 # Required for Prithvi tests
gpt-oss >= 0.0.7; python_version > '3.11' gpt-oss >= 0.0.7; python_version > '3.11'
perceptron # required for isaac test perceptron # required for isaac test
kaldi-native-fbank >= 1.18.7 # required for fireredasr2 test
# Newer versions of datasets require torchcoded, that makes the tests fail in CI because of a missing library.
# Older versions are in conflict with teerratorch requirements.
datasets>=3.3.0,<=3.6.0
openpyxl # required for perf comparison excel report
plotly # required for perf comparison html report
# This file was autogenerated by uv via the following command: # This file was autogenerated by uv via the following command:
# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu129 --python-platform x86_64-manylinux_2_28 --python-version 3.12 # uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu129 --python-platform x86_64-manylinux_2_28 --python-version 3.12
absl-py==2.1.0 absl-py==2.1.0
# via rouge-score
accelerate==1.0.1
# via # via
# lm-eval # rouge-score
# peft # tensorboard
accelerate==1.0.1
# via peft
aenum==3.1.16 aenum==3.1.16
# via lightly # via lightly
affine==2.4.0 affine==2.4.0
...@@ -31,9 +31,7 @@ albumentations==1.4.6 ...@@ -31,9 +31,7 @@ albumentations==1.4.6
# -r requirements/test.in # -r requirements/test.in
# terratorch # terratorch
alembic==1.16.4 alembic==1.16.4
# via # via optuna
# mlflow
# optuna
annotated-doc==0.0.4 annotated-doc==0.0.4
# via fastapi # via fastapi
annotated-types==0.7.0 annotated-types==0.7.0
...@@ -64,18 +62,26 @@ attrs==24.2.0 ...@@ -64,18 +62,26 @@ attrs==24.2.0
# referencing # referencing
audioread==3.0.1 audioread==3.0.1
# via librosa # via librosa
av==16.1.0
# via -r requirements/test.in
azure-core==1.38.2
# via
# azure-identity
# azure-storage-blob
azure-identity==1.25.2
# via runai-model-streamer-azure
azure-storage-blob==12.28.0
# via runai-model-streamer-azure
backoff==2.2.1 backoff==2.2.1
# via # via
# -r requirements/test.in # -r requirements/test.in
# schemathesis # schemathesis
bitsandbytes==0.46.1 bitsandbytes==0.49.2
# via # via
# -r requirements/test.in # -r requirements/test.in
# lightning # lightning
black==24.10.0 black==24.10.0
# via datamodel-code-generator # via datamodel-code-generator
blinker==1.9.0
# via flask
blobfile==3.0.0 blobfile==3.0.0
# via -r requirements/test.in # via -r requirements/test.in
bm25s==0.2.13 bm25s==0.2.13
...@@ -93,9 +99,7 @@ bounded-pool-executor==0.0.3 ...@@ -93,9 +99,7 @@ bounded-pool-executor==0.0.3
buildkite-test-collector==0.1.9 buildkite-test-collector==0.1.9
# via -r requirements/test.in # via -r requirements/test.in
cachetools==5.5.2 cachetools==5.5.2
# via # via google-auth
# google-auth
# mlflow-skinny
certifi==2024.8.30 certifi==2024.8.30
# via # via
# fiona # fiona
...@@ -106,8 +110,11 @@ certifi==2024.8.30 ...@@ -106,8 +110,11 @@ certifi==2024.8.30
# pyproj # pyproj
# rasterio # rasterio
# requests # requests
cffi==1.17.1 # sentry-sdk
# via soundfile cffi==2.0.0
# via
# cryptography
# soundfile
chardet==5.2.0 chardet==5.2.0
# via mbstrdecoder # via mbstrdecoder
charset-normalizer==3.4.0 charset-normalizer==3.4.0
...@@ -120,15 +127,14 @@ click==8.1.7 ...@@ -120,15 +127,14 @@ click==8.1.7
# click-plugins # click-plugins
# cligj # cligj
# fiona # fiona
# flask
# jiwer # jiwer
# mlflow-skinny
# nltk # nltk
# rasterio # rasterio
# ray # ray
# schemathesis # schemathesis
# typer # typer
# uvicorn # uvicorn
# wandb
click-plugins==1.1.1.2 click-plugins==1.1.1.2
# via # via
# fiona # fiona
...@@ -137,14 +143,11 @@ cligj==0.7.2 ...@@ -137,14 +143,11 @@ cligj==0.7.2
# via # via
# fiona # fiona
# rasterio # rasterio
cloudpickle==3.1.1
# via mlflow-skinny
colorama==0.4.6 colorama==0.4.6
# via # via
# perceptron # perceptron
# sacrebleu # sacrebleu
# schemathesis # schemathesis
# tqdm-multiprocess
colorful==0.5.6 colorful==0.5.6
# via ray # via ray
colorlog==6.10.1 colorlog==6.10.1
...@@ -155,6 +158,12 @@ coverage==7.10.6 ...@@ -155,6 +158,12 @@ coverage==7.10.6
# via pytest-cov # via pytest-cov
cramjam==2.9.0 cramjam==2.9.0
# via fastparquet # via fastparquet
cryptography==46.0.5
# via
# azure-identity
# azure-storage-blob
# msal
# pyjwt
cuda-bindings==12.9.4 cuda-bindings==12.9.4
# via torch # via torch
cuda-pathfinder==1.3.3 cuda-pathfinder==1.3.3
...@@ -163,16 +172,15 @@ cupy-cuda12x==13.6.0 ...@@ -163,16 +172,15 @@ cupy-cuda12x==13.6.0
# via ray # via ray
cycler==0.12.1 cycler==0.12.1
# via matplotlib # via matplotlib
databricks-sdk==0.59.0
# via mlflow-skinny
datamodel-code-generator==0.26.3 datamodel-code-generator==0.26.3
# via -r requirements/test.in # via -r requirements/test.in
dataproperty==1.0.1 dataproperty==1.0.1
# via # via
# pytablewriter # pytablewriter
# tabledata # tabledata
datasets==3.0.2 datasets==3.3.0
# via # via
# -r requirements/test.in
# evaluate # evaluate
# lm-eval # lm-eval
# mteb # mteb
...@@ -180,6 +188,8 @@ decorator==5.1.1 ...@@ -180,6 +188,8 @@ decorator==5.1.1
# via librosa # via librosa
decord==0.6.0 decord==0.6.0
# via -r requirements/test.in # via -r requirements/test.in
diffusers==0.36.0
# via terratorch
dill==0.3.8 dill==0.3.8
# via # via
# datasets # datasets
...@@ -191,15 +201,11 @@ distlib==0.3.9 ...@@ -191,15 +201,11 @@ distlib==0.3.9
dnspython==2.7.0 dnspython==2.7.0
# via email-validator # via email-validator
docker==7.1.0 docker==7.1.0
# via # via gpt-oss
# gpt-oss
# mlflow
docopt==0.6.2 docopt==0.6.2
# via num2words # via num2words
docstring-parser==0.17.0 docstring-parser==0.17.0
# via jsonargparse # via jsonargparse
efficientnet-pytorch==0.7.1
# via segmentation-models-pytorch
einops==0.8.1 einops==0.8.1
# via # via
# -r requirements/test.in # -r requirements/test.in
...@@ -214,12 +220,12 @@ email-validator==2.2.0 ...@@ -214,12 +220,12 @@ email-validator==2.2.0
# via pydantic # via pydantic
encodec==0.1.1 encodec==0.1.1
# via vocos # via vocos
et-xmlfile==2.0.0
# via openpyxl
evaluate==0.4.3 evaluate==0.4.3
# via lm-eval # via lm-eval
fastapi==0.128.0 fastapi==0.128.0
# via # via gpt-oss
# gpt-oss
# mlflow-skinny
fastparquet==2024.11.0 fastparquet==2024.11.0
# via genai-perf # via genai-perf
fastrlock==0.8.2 fastrlock==0.8.2
...@@ -230,6 +236,7 @@ filelock==3.16.1 ...@@ -230,6 +236,7 @@ filelock==3.16.1
# via # via
# blobfile # blobfile
# datasets # datasets
# diffusers
# huggingface-hub # huggingface-hub
# ray # ray
# torch # torch
...@@ -237,8 +244,6 @@ filelock==3.16.1 ...@@ -237,8 +244,6 @@ filelock==3.16.1
# virtualenv # virtualenv
fiona==1.10.1 fiona==1.10.1
# via torchgeo # via torchgeo
flask==3.1.1
# via mlflow
fonttools==4.55.0 fonttools==4.55.0
# via matplotlib # via matplotlib
fqdn==1.5.1 fqdn==1.5.1
...@@ -249,7 +254,7 @@ frozenlist==1.5.0 ...@@ -249,7 +254,7 @@ frozenlist==1.5.0
# via # via
# aiohttp # aiohttp
# aiosignal # aiosignal
fsspec==2024.9.0 fsspec==2024.12.0
# via # via
# datasets # datasets
# evaluate # evaluate
...@@ -257,6 +262,7 @@ fsspec==2024.9.0 ...@@ -257,6 +262,7 @@ fsspec==2024.9.0
# huggingface-hub # huggingface-hub
# lightning # lightning
# pytorch-lightning # pytorch-lightning
# tacoreader
# torch # torch
ftfy==6.3.1 ftfy==6.3.1
# via open-clip-torch # via open-clip-torch
...@@ -269,7 +275,7 @@ geopandas==1.0.1 ...@@ -269,7 +275,7 @@ geopandas==1.0.1
gitdb==4.0.12 gitdb==4.0.12
# via gitpython # via gitpython
gitpython==3.1.44 gitpython==3.1.44
# via mlflow-skinny # via wandb
google-api-core==2.24.2 google-api-core==2.24.2
# via # via
# google-cloud-core # google-cloud-core
...@@ -277,7 +283,6 @@ google-api-core==2.24.2 ...@@ -277,7 +283,6 @@ google-api-core==2.24.2
# opencensus # opencensus
google-auth==2.40.2 google-auth==2.40.2
# via # via
# databricks-sdk
# google-api-core # google-api-core
# google-cloud-core # google-cloud-core
# google-cloud-storage # google-cloud-storage
...@@ -296,25 +301,18 @@ googleapis-common-protos==1.70.0 ...@@ -296,25 +301,18 @@ googleapis-common-protos==1.70.0
# via google-api-core # via google-api-core
gpt-oss==0.0.8 gpt-oss==0.0.8
# via -r requirements/test.in # via -r requirements/test.in
graphene==3.4.3
# via mlflow
graphql-core==3.2.6 graphql-core==3.2.6
# via # via hypothesis-graphql
# graphene
# graphql-relay
# hypothesis-graphql
graphql-relay==3.2.0
# via graphene
greenlet==3.2.3 greenlet==3.2.3
# via sqlalchemy # via sqlalchemy
grpcio==1.78.0 grpcio==1.78.0
# via # via
# grpcio-tools # -r requirements/test.in
# grpcio-reflection
# ray # ray
grpcio-tools==1.78.0 # tensorboard
grpcio-reflection==1.78.0
# via -r requirements/test.in # via -r requirements/test.in
gunicorn==23.0.0
# via mlflow
h11==0.14.0 h11==0.14.0
# via # via
# httpcore # httpcore
...@@ -338,12 +336,14 @@ httpcore==1.0.6 ...@@ -338,12 +336,14 @@ httpcore==1.0.6
httpx==0.27.2 httpx==0.27.2
# via # via
# -r requirements/test.in # -r requirements/test.in
# diffusers
# perceptron # perceptron
# schemathesis # schemathesis
huggingface-hub==0.36.2 huggingface-hub==0.36.2
# via # via
# accelerate # accelerate
# datasets # datasets
# diffusers
# evaluate # evaluate
# open-clip-torch # open-clip-torch
# peft # peft
...@@ -379,11 +379,13 @@ idna==3.10 ...@@ -379,11 +379,13 @@ idna==3.10
# jsonschema # jsonschema
# requests # requests
# yarl # yarl
imagehash==4.3.2
# via -r requirements/test.in
imageio==2.37.0 imageio==2.37.0
# via scikit-image # via scikit-image
importlib-metadata==8.7.0 importlib-metadata==8.7.0
# via # via
# mlflow-skinny # diffusers
# opentelemetry-api # opentelemetry-api
importlib-resources==6.5.2 importlib-resources==6.5.2
# via typeshed-client # via typeshed-client
...@@ -391,18 +393,19 @@ inflect==5.6.2 ...@@ -391,18 +393,19 @@ inflect==5.6.2
# via datamodel-code-generator # via datamodel-code-generator
iniconfig==2.0.0 iniconfig==2.0.0
# via pytest # via pytest
instanttensor==0.1.5
# via -r requirements/test.in
isodate==0.7.2
# via azure-storage-blob
isoduration==20.11.0 isoduration==20.11.0
# via jsonschema # via jsonschema
isort==5.13.2 isort==5.13.2
# via datamodel-code-generator # via datamodel-code-generator
itsdangerous==2.2.0
# via flask
jinja2==3.1.6 jinja2==3.1.6
# via # via
# datamodel-code-generator # datamodel-code-generator
# flask
# genai-perf # genai-perf
# mlflow # lm-eval
# torch # torch
jiwer==3.0.5 jiwer==3.0.5
# via -r requirements/test.in # via -r requirements/test.in
...@@ -415,12 +418,14 @@ joblib==1.4.2 ...@@ -415,12 +418,14 @@ joblib==1.4.2
# librosa # librosa
# nltk # nltk
# scikit-learn # scikit-learn
jsonargparse==4.35.0 jsonargparse==4.46.0
# via # via
# lightning # lightning
# terratorch # terratorch
jsonlines==4.0.0 jsonlines==4.0.0
# via lm-eval # via lm-eval
jsonnet==0.21.0
# via jsonargparse
jsonpointer==3.0.0 jsonpointer==3.0.0
# via jsonschema # via jsonschema
jsonschema==4.23.0 jsonschema==4.23.0
...@@ -433,6 +438,8 @@ jsonschema-specifications==2024.10.1 ...@@ -433,6 +438,8 @@ jsonschema-specifications==2024.10.1
# via jsonschema # via jsonschema
junit-xml==1.9 junit-xml==1.9
# via schemathesis # via schemathesis
kaldi-native-fbank==1.22.3
# via -r requirements/test.in
kaleido==0.2.1 kaleido==0.2.1
# via genai-perf # via genai-perf
kiwisolver==1.4.7 kiwisolver==1.4.7
...@@ -449,13 +456,13 @@ libnacl==2.1.0 ...@@ -449,13 +456,13 @@ libnacl==2.1.0
# via tensorizer # via tensorizer
librosa==0.10.2.post1 librosa==0.10.2.post1
# via -r requirements/test.in # via -r requirements/test.in
lightly==1.5.20 lightly==1.5.22
# via # via
# terratorch # terratorch
# torchgeo # torchgeo
lightly-utils==0.0.2 lightly-utils==0.0.2
# via lightly # via lightly
lightning==2.5.1.post0 lightning==2.6.1
# via # via
# terratorch # terratorch
# torchgeo # torchgeo
...@@ -466,7 +473,7 @@ lightning-utilities==0.14.3 ...@@ -466,7 +473,7 @@ lightning-utilities==0.14.3
# torchmetrics # torchmetrics
llvmlite==0.44.0 llvmlite==0.44.0
# via numba # via numba
lm-eval==0.4.9.2 lm-eval==0.4.11
# via -r requirements/test.in # via -r requirements/test.in
lxml==5.3.0 lxml==5.3.0
# via # via
...@@ -476,12 +483,11 @@ lxml==5.3.0 ...@@ -476,12 +483,11 @@ lxml==5.3.0
mako==1.3.10 mako==1.3.10
# via alembic # via alembic
markdown==3.8.2 markdown==3.8.2
# via mlflow # via tensorboard
markdown-it-py==3.0.0 markdown-it-py==3.0.0
# via rich # via rich
markupsafe==3.0.1 markupsafe==3.0.1
# via # via
# flask
# jinja2 # jinja2
# mako # mako
# werkzeug # werkzeug
...@@ -489,7 +495,6 @@ matplotlib==3.9.2 ...@@ -489,7 +495,6 @@ matplotlib==3.9.2
# via # via
# -r requirements/test.in # -r requirements/test.in
# lightning # lightning
# mlflow
# pycocotools # pycocotools
# torchgeo # torchgeo
mbstrdecoder==1.1.3 mbstrdecoder==1.1.3
...@@ -499,21 +504,23 @@ mbstrdecoder==1.1.3 ...@@ -499,21 +504,23 @@ mbstrdecoder==1.1.3
# typepy # typepy
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
mistral-common==1.9.0 mistral-common==1.10.0
# via -r requirements/test.in # via -r requirements/test.in
mlflow==2.22.0
# via terratorch
mlflow-skinny==2.22.0
# via mlflow
more-itertools==10.5.0 more-itertools==10.5.0
# via lm-eval # via lm-eval
mpmath==1.3.0 mpmath==1.3.0
# via sympy # via sympy
msal==1.34.0
# via
# azure-identity
# msal-extensions
msal-extensions==1.3.1
# via azure-identity
msgpack==1.1.0 msgpack==1.1.0
# via # via
# librosa # librosa
# ray # ray
mteb==2.1.2 mteb==2.8.3
# via -r requirements/test.in # via -r requirements/test.in
multidict==6.1.0 multidict==6.1.0
# via # via
...@@ -523,8 +530,6 @@ multiprocess==0.70.16 ...@@ -523,8 +530,6 @@ multiprocess==0.70.16
# via # via
# datasets # datasets
# evaluate # evaluate
munch==4.0.0
# via pretrainedmodels
mypy-extensions==1.0.0 mypy-extensions==1.0.0
# via black # via black
networkx==3.2.1 networkx==3.2.1
...@@ -539,8 +544,6 @@ numba==0.61.2 ...@@ -539,8 +544,6 @@ numba==0.61.2
# via # via
# -r requirements/test.in # -r requirements/test.in
# librosa # librosa
numexpr==2.10.1
# via lm-eval
numpy==2.2.6 numpy==2.2.6
# via # via
# -r requirements/test.in # -r requirements/test.in
...@@ -553,6 +556,7 @@ numpy==2.2.6 ...@@ -553,6 +556,7 @@ numpy==2.2.6
# cupy-cuda12x # cupy-cuda12x
# datasets # datasets
# decord # decord
# diffusers
# einx # einx
# encodec # encodec
# evaluate # evaluate
...@@ -560,16 +564,16 @@ numpy==2.2.6 ...@@ -560,16 +564,16 @@ numpy==2.2.6
# genai-perf # genai-perf
# geopandas # geopandas
# h5py # h5py
# imagehash
# imageio # imageio
# librosa # librosa
# lightly # lightly
# lightly-utils # lightly-utils
# lm-eval
# matplotlib # matplotlib
# mistral-common # mistral-common
# mlflow
# mteb # mteb
# numba # numba
# numexpr
# opencv-python-headless # opencv-python-headless
# optuna # optuna
# pandas # pandas
...@@ -578,6 +582,7 @@ numpy==2.2.6 ...@@ -578,6 +582,7 @@ numpy==2.2.6
# perceptron # perceptron
# pycocotools # pycocotools
# pyogrio # pyogrio
# pywavelets
# rasterio # rasterio
# rioxarray # rioxarray
# rouge-score # rouge-score
...@@ -590,8 +595,10 @@ numpy==2.2.6 ...@@ -590,8 +595,10 @@ numpy==2.2.6
# shapely # shapely
# soxr # soxr
# statsmodels # statsmodels
# tensorboard
# tensorboardx # tensorboardx
# tensorizer # tensorizer
# terratorch
# tifffile # tifffile
# torchgeo # torchgeo
# torchmetrics # torchmetrics
...@@ -657,9 +664,10 @@ opencv-python-headless==4.13.0.90 ...@@ -657,9 +664,10 @@ opencv-python-headless==4.13.0.90
# albucore # albucore
# albumentations # albumentations
# mistral-common # mistral-common
openpyxl==3.1.5
# via -r requirements/test.in
opentelemetry-api==1.35.0 opentelemetry-api==1.35.0
# via # via
# mlflow-skinny
# opentelemetry-exporter-prometheus # opentelemetry-exporter-prometheus
# opentelemetry-sdk # opentelemetry-sdk
# opentelemetry-semantic-conventions # opentelemetry-semantic-conventions
...@@ -669,7 +677,6 @@ opentelemetry-proto==1.36.0 ...@@ -669,7 +677,6 @@ opentelemetry-proto==1.36.0
# via ray # via ray
opentelemetry-sdk==1.35.0 opentelemetry-sdk==1.35.0
# via # via
# mlflow-skinny
# opentelemetry-exporter-prometheus # opentelemetry-exporter-prometheus
# ray # ray
opentelemetry-semantic-conventions==0.56b0 opentelemetry-semantic-conventions==0.56b0
...@@ -681,13 +688,13 @@ orjson==3.11.5 ...@@ -681,13 +688,13 @@ orjson==3.11.5
packaging==24.2 packaging==24.2
# via # via
# accelerate # accelerate
# bitsandbytes
# black # black
# datamodel-code-generator # datamodel-code-generator
# datasets # datasets
# evaluate # evaluate
# fastparquet # fastparquet
# geopandas # geopandas
# gunicorn
# huggingface-hub # huggingface-hub
# hydra-core # hydra-core
# kornia # kornia
...@@ -695,7 +702,6 @@ packaging==24.2 ...@@ -695,7 +702,6 @@ packaging==24.2
# lightning # lightning
# lightning-utilities # lightning-utilities
# matplotlib # matplotlib
# mlflow-skinny
# optuna # optuna
# peft # peft
# plotly # plotly
...@@ -708,10 +714,12 @@ packaging==24.2 ...@@ -708,10 +714,12 @@ packaging==24.2
# rioxarray # rioxarray
# scikit-image # scikit-image
# statsmodels # statsmodels
# tensorboard
# tensorboardx # tensorboardx
# torchmetrics # torchmetrics
# transformers # transformers
# typepy # typepy
# wandb
# xarray # xarray
pandas==2.2.3 pandas==2.2.3
# via # via
...@@ -720,8 +728,8 @@ pandas==2.2.3 ...@@ -720,8 +728,8 @@ pandas==2.2.3
# fastparquet # fastparquet
# genai-perf # genai-perf
# geopandas # geopandas
# mlflow
# statsmodels # statsmodels
# tacoreader
# torchgeo # torchgeo
# xarray # xarray
pathspec==0.12.1 pathspec==0.12.1
...@@ -731,16 +739,16 @@ pathvalidate==3.2.1 ...@@ -731,16 +739,16 @@ pathvalidate==3.2.1
patsy==1.0.1 patsy==1.0.1
# via statsmodels # via statsmodels
peft==0.16.0 peft==0.16.0
# via # via -r requirements/test.in
# -r requirements/test.in
# lm-eval
perceptron==0.1.4 perceptron==0.1.4
# via -r requirements/test.in # via -r requirements/test.in
perf-analyzer==0.1.0 perf-analyzer==0.1.0
# via genai-perf # via genai-perf
pillow==10.4.0 pillow==10.4.0
# via # via
# diffusers
# genai-perf # genai-perf
# imagehash
# imageio # imageio
# lightly-utils # lightly-utils
# matplotlib # matplotlib
...@@ -748,6 +756,7 @@ pillow==10.4.0 ...@@ -748,6 +756,7 @@ pillow==10.4.0
# perceptron # perceptron
# scikit-image # scikit-image
# segmentation-models-pytorch # segmentation-models-pytorch
# tensorboard
# torchgeo # torchgeo
# torchvision # torchvision
platformdirs==4.3.6 platformdirs==4.3.6
...@@ -755,8 +764,11 @@ platformdirs==4.3.6 ...@@ -755,8 +764,11 @@ platformdirs==4.3.6
# black # black
# pooch # pooch
# virtualenv # virtualenv
# wandb
plotly==5.24.1 plotly==5.24.1
# via genai-perf # via
# -r requirements/test.in
# genai-perf
pluggy==1.5.0 pluggy==1.5.0
# via # via
# pytest # pytest
...@@ -769,8 +781,6 @@ portalocker==2.10.1 ...@@ -769,8 +781,6 @@ portalocker==2.10.1
# via sacrebleu # via sacrebleu
pqdm==0.2.0 pqdm==0.2.0
# via -r requirements/test.in # via -r requirements/test.in
pretrainedmodels==0.7.4
# via segmentation-models-pytorch
prometheus-client==0.22.0 prometheus-client==0.22.0
# via # via
# opentelemetry-exporter-prometheus # opentelemetry-exporter-prometheus
...@@ -785,13 +795,14 @@ protobuf==6.33.2 ...@@ -785,13 +795,14 @@ protobuf==6.33.2
# via # via
# google-api-core # google-api-core
# googleapis-common-protos # googleapis-common-protos
# grpcio-tools # grpcio-reflection
# mlflow-skinny
# opentelemetry-proto # opentelemetry-proto
# proto-plus # proto-plus
# ray # ray
# tensorboard
# tensorboardx # tensorboardx
# tensorizer # tensorizer
# wandb
psutil==6.1.0 psutil==6.1.0
# via # via
# accelerate # accelerate
...@@ -801,19 +812,18 @@ py==1.11.0 ...@@ -801,19 +812,18 @@ py==1.11.0
# via pytest-forked # via pytest-forked
py-spy==0.4.0 py-spy==0.4.0
# via ray # via ray
pyarrow==18.0.0 pyarrow==23.0.0
# via # via
# datasets # datasets
# genai-perf # genai-perf
# mlflow # tacoreader
# terratorch
pyasn1==0.6.1 pyasn1==0.6.1
# via # via
# pyasn1-modules # pyasn1-modules
# rsa # rsa
pyasn1-modules==0.4.2 pyasn1-modules==0.4.2
# via google-auth # via google-auth
pybind11==2.13.6
# via lm-eval
pycocotools==2.0.8 pycocotools==2.0.8
# via terratorch # via terratorch
pycountry==24.6.1 pycountry==24.6.1
...@@ -831,17 +841,19 @@ pydantic==2.12.0 ...@@ -831,17 +841,19 @@ pydantic==2.12.0
# gpt-oss # gpt-oss
# lightly # lightly
# mistral-common # mistral-common
# mlflow-skinny
# mteb # mteb
# openai-harmony # openai-harmony
# pydantic-extra-types # pydantic-extra-types
# ray # ray
# wandb
pydantic-core==2.41.1 pydantic-core==2.41.1
# via pydantic # via pydantic
pydantic-extra-types==2.10.5 pydantic-extra-types==2.10.5
# via mistral-common # via mistral-common
pygments==2.18.0 pygments==2.18.0
# via rich # via rich
pyjwt==2.11.0
# via msal
pyogrio==0.11.0 pyogrio==0.11.0
# via geopandas # via geopandas
pyparsing==3.2.0 pyparsing==3.2.0
...@@ -873,7 +885,6 @@ pytest==8.3.5 ...@@ -873,7 +885,6 @@ pytest==8.3.5
# pytest-subtests # pytest-subtests
# pytest-timeout # pytest-timeout
# schemathesis # schemathesis
# terratorch
pytest-asyncio==0.24.0 pytest-asyncio==0.24.0
# via -r requirements/test.in # via -r requirements/test.in
pytest-cov==6.3.0 pytest-cov==6.3.0
...@@ -896,7 +907,6 @@ python-dateutil==2.9.0.post0 ...@@ -896,7 +907,6 @@ python-dateutil==2.9.0.post0
# via # via
# arrow # arrow
# botocore # botocore
# graphene
# lightly # lightly
# matplotlib # matplotlib
# pandas # pandas
...@@ -913,6 +923,8 @@ pytz==2024.2 ...@@ -913,6 +923,8 @@ pytz==2024.2
# via # via
# pandas # pandas
# typepy # typepy
pywavelets==1.9.0
# via imagehash
pyyaml==6.0.2 pyyaml==6.0.2
# via # via
# accelerate # accelerate
...@@ -923,7 +935,6 @@ pyyaml==6.0.2 ...@@ -923,7 +935,6 @@ pyyaml==6.0.2
# huggingface-hub # huggingface-hub
# jsonargparse # jsonargparse
# lightning # lightning
# mlflow-skinny
# omegaconf # omegaconf
# optuna # optuna
# peft # peft
...@@ -934,6 +945,7 @@ pyyaml==6.0.2 ...@@ -934,6 +945,7 @@ pyyaml==6.0.2
# timm # timm
# transformers # transformers
# vocos # vocos
# wandb
rapidfuzz==3.12.1 rapidfuzz==3.12.1
# via jiwer # via jiwer
rasterio==1.4.3 rasterio==1.4.3
...@@ -951,6 +963,7 @@ referencing==0.35.1 ...@@ -951,6 +963,7 @@ referencing==0.35.1
# jsonschema-specifications # jsonschema-specifications
regex==2024.9.11 regex==2024.9.11
# via # via
# diffusers
# nltk # nltk
# open-clip-torch # open-clip-torch
# sacrebleu # sacrebleu
...@@ -958,9 +971,10 @@ regex==2024.9.11 ...@@ -958,9 +971,10 @@ regex==2024.9.11
# transformers # transformers
requests==2.32.3 requests==2.32.3
# via # via
# azure-core
# buildkite-test-collector # buildkite-test-collector
# databricks-sdk
# datasets # datasets
# diffusers
# docker # docker
# evaluate # evaluate
# google-api-core # google-api-core
...@@ -970,15 +984,17 @@ requests==2.32.3 ...@@ -970,15 +984,17 @@ requests==2.32.3
# lightly # lightly
# lm-eval # lm-eval
# mistral-common # mistral-common
# mlflow-skinny # msal
# mteb # mteb
# pooch # pooch
# ray # ray
# responses # responses
# schemathesis # schemathesis
# starlette-testclient # starlette-testclient
# tacoreader
# tiktoken # tiktoken
# transformers # transformers
# wandb
responses==0.25.3 responses==0.25.3
# via genai-perf # via genai-perf
rfc3339-validator==0.1.4 rfc3339-validator==0.1.4
...@@ -991,6 +1007,7 @@ rich==13.9.4 ...@@ -991,6 +1007,7 @@ rich==13.9.4
# lightning # lightning
# mteb # mteb
# perceptron # perceptron
# terratorch
# typer # typer
rioxarray==0.19.0 rioxarray==0.19.0
# via terratorch # via terratorch
...@@ -1004,11 +1021,13 @@ rsa==4.9.1 ...@@ -1004,11 +1021,13 @@ rsa==4.9.1
# via google-auth # via google-auth
rtree==1.4.0 rtree==1.4.0
# via torchgeo # via torchgeo
runai-model-streamer==0.15.3 runai-model-streamer==0.15.7
# via -r requirements/test.in # via -r requirements/test.in
runai-model-streamer-gcs==0.15.3 runai-model-streamer-azure==0.15.7
# via runai-model-streamer
runai-model-streamer-gcs==0.15.7
# via runai-model-streamer # via runai-model-streamer
runai-model-streamer-s3==0.15.3 runai-model-streamer-s3==0.15.7
# via runai-model-streamer # via runai-model-streamer
s3transfer==0.10.3 s3transfer==0.10.3
# via boto3 # via boto3
...@@ -1017,47 +1036,54 @@ sacrebleu==2.4.3 ...@@ -1017,47 +1036,54 @@ sacrebleu==2.4.3
safetensors==0.4.5 safetensors==0.4.5
# via # via
# accelerate # accelerate
# diffusers
# open-clip-torch # open-clip-torch
# peft # peft
# segmentation-models-pytorch
# timm # timm
# transformers # transformers
schemathesis==3.39.15 schemathesis==3.39.15
# via -r requirements/test.in # via -r requirements/test.in
scikit-image==0.25.2 scikit-image==0.25.2
# via albumentations # via
# albumentations
# terratorch
scikit-learn==1.5.2 scikit-learn==1.5.2
# via # via
# albumentations # albumentations
# librosa # librosa
# lm-eval # lm-eval
# mlflow
# mteb # mteb
# sentence-transformers # sentence-transformers
# terratorch
scipy==1.13.1 scipy==1.13.1
# via # via
# albumentations # albumentations
# bm25s # bm25s
# imagehash
# librosa # librosa
# mlflow
# mteb # mteb
# scikit-image # scikit-image
# scikit-learn # scikit-learn
# sentence-transformers # sentence-transformers
# statsmodels # statsmodels
# vocos # vocos
segmentation-models-pytorch==0.4.0 segmentation-models-pytorch==0.5.0
# via # via
# -r requirements/test.in
# terratorch # terratorch
# torchgeo # torchgeo
sentence-transformers==5.2.0 sentence-transformers==5.2.0
# via # via
# -r requirements/test.in # -r requirements/test.in
# mteb # mteb
sentry-sdk==2.52.0
# via wandb
setuptools==77.0.3 setuptools==77.0.3
# via # via
# grpcio-tools
# lightning-utilities # lightning-utilities
# pytablewriter # pytablewriter
# tensorboard
# torch # torch
shapely==2.1.1 shapely==2.1.1
# via # via
...@@ -1075,7 +1101,6 @@ six==1.16.0 ...@@ -1075,7 +1101,6 @@ six==1.16.0
# python-dateutil # python-dateutil
# rfc3339-validator # rfc3339-validator
# rouge-score # rouge-score
# segmentation-models-pytorch
smart-open==7.1.0 smart-open==7.1.0
# via ray # via ray
smmap==5.0.2 smmap==5.0.2
...@@ -1099,12 +1124,9 @@ soxr==0.5.0.post1 ...@@ -1099,12 +1124,9 @@ soxr==0.5.0.post1
sqlalchemy==2.0.41 sqlalchemy==2.0.41
# via # via
# alembic # alembic
# mlflow
# optuna # optuna
sqlitedict==2.1.0 sqlitedict==2.1.0
# via lm-eval # via lm-eval
sqlparse==0.5.3
# via mlflow-skinny
starlette==0.50.0 starlette==0.50.0
# via # via
# fastapi # fastapi
...@@ -1124,6 +1146,8 @@ tabledata==1.3.3 ...@@ -1124,6 +1146,8 @@ tabledata==1.3.3
# via pytablewriter # via pytablewriter
tabulate==0.9.0 tabulate==0.9.0
# via sacrebleu # via sacrebleu
tacoreader==0.5.6
# via terratorch
tblib==3.1.0 tblib==3.1.0
# via -r requirements/test.in # via -r requirements/test.in
tcolorpy==0.1.6 tcolorpy==0.1.6
...@@ -1133,13 +1157,19 @@ tenacity==9.1.2 ...@@ -1133,13 +1157,19 @@ tenacity==9.1.2
# gpt-oss # gpt-oss
# lm-eval # lm-eval
# plotly # plotly
tensorboard==2.20.0
# via terratorch
tensorboard-data-server==0.7.2
# via tensorboard
tensorboardx==2.6.4 tensorboardx==2.6.4
# via lightning # via lightning
tensorizer==2.10.1 tensorizer==2.10.1
# via -r requirements/test.in # via -r requirements/test.in
termcolor==3.1.0 termcolor==3.1.0
# via gpt-oss # via
terratorch @ git+https://github.com/IBM/terratorch.git@07184fcf91a1324f831ff521dd238d97fe350e3e # gpt-oss
# terratorch
terratorch==1.2.2
# via -r requirements/test.in # via -r requirements/test.in
threadpoolctl==3.5.0 threadpoolctl==3.5.0
# via scikit-learn # via scikit-learn
...@@ -1172,16 +1202,14 @@ torch==2.10.0+cu129 ...@@ -1172,16 +1202,14 @@ torch==2.10.0+cu129
# -r requirements/test.in # -r requirements/test.in
# accelerate # accelerate
# bitsandbytes # bitsandbytes
# efficientnet-pytorch
# encodec # encodec
# instanttensor
# kornia # kornia
# lightly # lightly
# lightning # lightning
# lm-eval
# mteb # mteb
# open-clip-torch # open-clip-torch
# peft # peft
# pretrainedmodels
# pytorch-lightning # pytorch-lightning
# runai-model-streamer # runai-model-streamer
# segmentation-models-pytorch # segmentation-models-pytorch
...@@ -1213,12 +1241,11 @@ torchvision==0.25.0+cu129 ...@@ -1213,12 +1241,11 @@ torchvision==0.25.0+cu129
# -r requirements/test.in # -r requirements/test.in
# lightly # lightly
# open-clip-torch # open-clip-torch
# pretrainedmodels
# segmentation-models-pytorch # segmentation-models-pytorch
# terratorch # terratorch
# timm # timm
# torchgeo # torchgeo
tqdm==4.66.6 tqdm==4.67.3
# via # via
# datasets # datasets
# evaluate # evaluate
...@@ -1232,19 +1259,16 @@ tqdm==4.66.6 ...@@ -1232,19 +1259,16 @@ tqdm==4.66.6
# optuna # optuna
# peft # peft
# pqdm # pqdm
# pretrainedmodels
# pytorch-lightning # pytorch-lightning
# segmentation-models-pytorch # segmentation-models-pytorch
# sentence-transformers # sentence-transformers
# tqdm-multiprocess # tacoreader
# terratorch
# transformers # transformers
tqdm-multiprocess==0.0.11
# via lm-eval
transformers==4.57.5 transformers==4.57.5
# via # via
# -r requirements/test.in # -r requirements/test.in
# genai-perf # genai-perf
# lm-eval
# peft # peft
# sentence-transformers # sentence-transformers
# transformers-stream-generator # transformers-stream-generator
...@@ -1272,16 +1296,18 @@ typing-extensions==4.15.0 ...@@ -1272,16 +1296,18 @@ typing-extensions==4.15.0
# aiosignal # aiosignal
# albumentations # albumentations
# alembic # alembic
# azure-core
# azure-identity
# azure-storage-blob
# chz # chz
# fastapi # fastapi
# graphene
# grpcio # grpcio
# huggingface-hub # huggingface-hub
# librosa # librosa
# lightning # lightning
# lightning-utilities # lightning-utilities
# lm-eval
# mistral-common # mistral-common
# mlflow-skinny
# mteb # mteb
# opentelemetry-api # opentelemetry-api
# opentelemetry-sdk # opentelemetry-sdk
...@@ -1299,6 +1325,7 @@ typing-extensions==4.15.0 ...@@ -1299,6 +1325,7 @@ typing-extensions==4.15.0
# typer # typer
# typeshed-client # typeshed-client
# typing-inspection # typing-inspection
# wandb
typing-inspection==0.4.2 typing-inspection==0.4.2
# via pydantic # via pydantic
tzdata==2024.2 tzdata==2024.2
...@@ -1313,25 +1340,26 @@ urllib3==2.2.3 ...@@ -1313,25 +1340,26 @@ urllib3==2.2.3
# lightly # lightly
# requests # requests
# responses # responses
# sentry-sdk
# tritonclient # tritonclient
uvicorn==0.35.0 uvicorn==0.35.0
# via # via gpt-oss
# gpt-oss
# mlflow-skinny
vector-quantize-pytorch==1.21.2 vector-quantize-pytorch==1.21.2
# via -r requirements/test.in # via -r requirements/test.in
virtualenv==20.31.2 virtualenv==20.31.2
# via ray # via ray
vocos==0.1.0 vocos==0.1.0
# via -r requirements/test.in # via -r requirements/test.in
wandb==0.24.2
# via terratorch
wcwidth==0.2.13 wcwidth==0.2.13
# via ftfy # via ftfy
webcolors==24.11.1 webcolors==24.11.1
# via jsonschema # via jsonschema
werkzeug==3.1.3 werkzeug==3.1.3
# via # via
# flask
# schemathesis # schemathesis
# tensorboard
word2number==1.1 word2number==1.1
# via lm-eval # via lm-eval
wrapt==1.17.2 wrapt==1.17.2
......
...@@ -15,4 +15,4 @@ torch==2.10.0+xpu ...@@ -15,4 +15,4 @@ torch==2.10.0+xpu
torchaudio torchaudio
torchvision torchvision
vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.2/vllm_xpu_kernels-0.1.2-cp312-cp312-linux_x86_64.whl vllm_xpu_kernels @ https://github.com/vllm-project/vllm-xpu-kernels/releases/download/v0.1.3/vllm_xpu_kernels-0.1.3-cp38-abi3-linux_x86_64.whl
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Autotune registered Helion kernels for optimal configurations.
Usage:
# Autotune all registered kernels
python scripts/autotune_helion_kernels.py
# Autotune specific kernel
python scripts/autotune_helion_kernels.py --kernels silu_mul_fp8
# Autotune multiple kernels
python scripts/autotune_helion_kernels.py --kernels silu_mul_fp8 rms_norm_fp8
# Force re-autotuning
python scripts/autotune_helion_kernels.py --force
# List available kernels
python scripts/autotune_helion_kernels.py --list
"""
import argparse
import sys
import time
from dataclasses import dataclass
import torch
from torch._subclasses.fake_tensor import FakeTensorMode
try:
import helion
from vllm.kernels.helion import (
ConfigManager,
get_kernel_by_name,
get_registered_kernels,
)
from vllm.kernels.helion.utils import get_canonical_gpu_name
from vllm.logger import init_logger
from vllm.utils.import_utils import has_helion
except ImportError as e:
print(f"Error importing vLLM: {e}")
print("Please ensure vLLM is installed and in your Python path")
sys.exit(1)
logger = init_logger("vllm.scripts.autotune_helion_kernels")
@dataclass
class AutotuneResult:
status: str # "success" | "partial" | "error" | "skipped"
successful: int
failed: int
configs: dict[str, "helion.Config"]
message: str = ""
def list_kernels() -> None:
kernels = get_registered_kernels()
if not kernels:
print("No Helion kernels found in registry.")
return
print("Available Helion kernels:")
print("=" * 50)
for name in sorted(kernels.keys()):
print(f" {name}")
print(f"\nTotal: {len(kernels)} kernels")
def check_requirements() -> bool:
if not torch.cuda.is_available():
logger.error("CUDA is not available. Helion autotuning requires GPU.")
return False
if not has_helion():
logger.error("Helion is not installed. Please install Helion package.")
return False
return True
def autotune_kernel(
kernel_name: str,
platform: str,
config_manager: ConfigManager,
force: bool = False,
autotune_effort: str = "quick",
) -> AutotuneResult:
logger.debug(
"Starting autotune for kernel '%s' with effort='%s'",
kernel_name,
autotune_effort,
)
kernel_wrapper = get_kernel_by_name(kernel_name)
if kernel_wrapper is None:
error_msg = f"Kernel '{kernel_name}' not found in registry"
logger.error(error_msg)
return AutotuneResult(
status="error",
message=error_msg,
successful=0,
failed=0,
configs={},
)
try:
with FakeTensorMode():
all_config_keys = list(kernel_wrapper.get_inputs().keys())
except NotImplementedError:
error_msg = f"Kernel '{kernel_name}' has no input generator registered"
logger.error(error_msg)
return AutotuneResult(
status="error",
message=error_msg,
successful=0,
failed=0,
configs={},
)
try:
logger.info(
"Autotuning kernel '%s' for platform '%s' with %d configs",
kernel_name,
platform,
len(all_config_keys),
)
if not force:
existing_configs = config_manager.get_platform_configs(
kernel_name, platform
)
keys_to_autotune = []
for config_key in all_config_keys:
if config_key in existing_configs:
logger.debug(
"Config '%s' already exists for platform '%s', skipping",
config_key,
platform,
)
else:
keys_to_autotune.append(config_key)
else:
logger.debug("Force mode enabled, will re-autotune all configs")
keys_to_autotune = all_config_keys
if not keys_to_autotune:
logger.info(
"All configs already exist for kernel '%s' on platform '%s'. "
"Use --force to re-autotune.",
kernel_name,
platform,
)
return AutotuneResult(
status="skipped",
message="All configs already exist",
successful=0,
failed=0,
configs={},
)
inputs_dict = kernel_wrapper.get_inputs()
configs_to_autotune = {k: inputs_dict[k] for k in keys_to_autotune}
total_start_time = time.time()
autotuned_configs = {}
failed_configs = []
for config_key, inputs in configs_to_autotune.items():
logger.info("Autotuning config: %s", config_key)
logger.debug(
"Input shapes: %s",
[getattr(inp, "shape", type(inp).__name__) for inp in inputs],
)
try:
config_start_time = time.time()
config = kernel_wrapper.run_autotune(inputs, autotune_effort)
config_duration = time.time() - config_start_time
# Save immediately for checkpointing
config_manager.save_configs(kernel_name, platform, {config_key: config})
autotuned_configs[config_key] = config
logger.debug("Config details: %s", config)
logger.info(
"✓ Autotuned and saved config '%s' (%.2fs)",
config_key,
config_duration,
)
except (RuntimeError, ValueError, OSError) as e:
logger.exception(
"Failed to autotune config '%s': %s",
config_key,
e,
)
failed_configs.append(config_key)
total_duration = time.time() - total_start_time
successful = len(autotuned_configs)
failed = len(failed_configs)
logger.info(
"Completed autotuning for kernel '%s': %d successful, %d failed (%.2fs)",
kernel_name,
successful,
failed,
total_duration,
)
status = "success" if failed == 0 else "partial"
return AutotuneResult(
status=status,
successful=successful,
failed=failed,
configs=autotuned_configs,
)
except (KeyError, RuntimeError, ValueError, OSError) as e:
error_msg = f"Unexpected error: {e}"
logger.exception("Failed to autotune kernel '%s': %s", kernel_name, e)
return AutotuneResult(
status="error",
message=error_msg,
successful=0,
failed=0,
configs={},
)
def summarize_results(results: dict[str, AutotuneResult]) -> bool:
logger.info("=" * 50)
logger.info("Autotuning Results Summary")
logger.info("=" * 50)
total_successful = 0
total_failed = 0
success_kernels = []
partial_kernels = []
error_kernels = []
skipped_kernels = []
for kernel_name, result in results.items():
total_successful += result.successful
total_failed += result.failed
if result.status == "success":
success_kernels.append(f"{kernel_name} ({result.successful} configs)")
logger.info("✓ %s: %d configs successful", kernel_name, result.successful)
elif result.status == "partial":
partial_kernels.append(
f"{kernel_name} ({result.successful} ok, {result.failed} failed)"
)
logger.warning(
"⚠ %s: %d successful, %d failed",
kernel_name,
result.successful,
result.failed,
)
elif result.status == "error":
error_kernels.append(f"{kernel_name}: {result.message or 'Unknown error'}")
logger.error("✗ %s: %s", kernel_name, result.message or "Unknown error")
elif result.status == "skipped":
skipped_kernels.append(f"{kernel_name}: {result.message or 'Skipped'}")
logger.info("- %s: %s", kernel_name, result.message or "Skipped")
logger.info("=" * 50)
logger.info(
"Summary: %d total configs (%d successful, %d failed)",
total_successful + total_failed,
total_successful,
total_failed,
)
logger.info(
"Kernels: %d success, %d partial, %d error, %d skipped",
len(success_kernels),
len(partial_kernels),
len(error_kernels),
len(skipped_kernels),
)
has_failures = bool(error_kernels or partial_kernels)
if not has_failures:
if total_successful > 0:
logger.info("All configs autotuned successfully!")
else:
logger.info("No new configs were generated (all may already exist)")
return not has_failures
def get_kernels_to_autotune(requested_kernels: list[str] | None) -> list[str]:
all_kernels = get_registered_kernels()
if not all_kernels:
logger.error("No Helion kernels found in registry")
sys.exit(1)
if not requested_kernels:
return list(all_kernels.keys())
if len(requested_kernels) != len(set(requested_kernels)):
duplicates = [
k for k in set(requested_kernels) if requested_kernels.count(k) > 1
]
logger.error("Duplicate kernel names in --kernels flag: %s", duplicates)
sys.exit(1)
kernels_to_autotune = []
missing_kernels = []
for kernel_name in requested_kernels:
if kernel_name in all_kernels:
kernels_to_autotune.append(kernel_name)
else:
missing_kernels.append(kernel_name)
if missing_kernels:
logger.error("Kernel(s) not found: %s", missing_kernels)
logger.error("Available kernels: %s", list(all_kernels.keys()))
sys.exit(1)
return kernels_to_autotune
def main():
parser = argparse.ArgumentParser(
description="Autotune Helion kernels",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__.split("Usage:")[1] if "Usage:" in __doc__ else "",
)
parser.add_argument(
"--kernels",
nargs="+",
help="Kernel(s) to autotune (default: all kernels)",
)
parser.add_argument(
"--config-dir",
type=str,
help="Config directory for config files (default: vLLM helion configs dir)",
)
parser.add_argument(
"--list",
action="store_true",
help="List available Helion kernels and exit",
)
parser.add_argument(
"--force",
action="store_true",
help=(
"Force re-autotuning even if configs already exist for the "
"platform and config keys"
),
)
parser.add_argument(
"--autotune-effort",
type=str,
default="quick",
help=(
"Helion autotune effort level: 'quick' (smaller search) or "
"'full' (full search budget) (default: quick)"
),
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable verbose logging",
)
args = parser.parse_args()
import logging
if args.verbose:
logging.getLogger("vllm").setLevel(logging.DEBUG)
logger.debug("Verbose mode enabled")
logger.debug("Arguments: %s", vars(args))
else:
logging.getLogger("vllm").setLevel(logging.INFO)
if args.list:
list_kernels()
return
if not check_requirements():
sys.exit(1)
platform = get_canonical_gpu_name()
logger.info("Detected GPU platform: %s", platform)
config_manager = (
ConfigManager(args.config_dir) if args.config_dir else ConfigManager()
)
try:
config_manager.ensure_base_dir_writable()
except OSError as e:
logger.error("Failed to access config directory: %s", e)
sys.exit(1)
kernels_to_autotune = get_kernels_to_autotune(args.kernels)
logger.info(
"Will autotune %d kernel(s) for platform '%s': %s",
len(kernels_to_autotune),
platform,
kernels_to_autotune,
)
results = {}
for kernel_name in kernels_to_autotune:
result = autotune_kernel(
kernel_name, platform, config_manager, args.force, args.autotune_effort
)
results[kernel_name] = result
success = summarize_results(results)
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()
...@@ -18,8 +18,6 @@ import torch ...@@ -18,8 +18,6 @@ import torch
from packaging.version import Version, parse from packaging.version import Version, parse
from setuptools import Extension, setup from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
from setuptools.command.build_py import build_py
from setuptools.command.develop import develop
from setuptools_scm import get_version from setuptools_scm import get_version
from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME
...@@ -81,81 +79,6 @@ def is_freethreaded(): ...@@ -81,81 +79,6 @@ def is_freethreaded():
return bool(sysconfig.get_config_var("Py_GIL_DISABLED")) return bool(sysconfig.get_config_var("Py_GIL_DISABLED"))
def compile_grpc_protos():
"""Compile gRPC protobuf definitions during build.
This generates *_pb2.py, *_pb2_grpc.py, and *_pb2.pyi files from
the vllm_engine.proto definition.
"""
try:
from grpc_tools import protoc
except ImportError:
logger.warning(
"grpcio-tools not installed, skipping gRPC proto compilation. "
"gRPC server functionality will not be available."
)
return False
proto_file = ROOT_DIR / "vllm" / "grpc" / "vllm_engine.proto"
if not proto_file.exists():
logger.warning("Proto file not found at %s, skipping compilation", proto_file)
return False
logger.info("Compiling gRPC protobuf: %s", proto_file)
result = protoc.main(
[
"grpc_tools.protoc",
f"--proto_path={ROOT_DIR}",
f"--python_out={ROOT_DIR}",
f"--grpc_python_out={ROOT_DIR}",
f"--pyi_out={ROOT_DIR}",
str(proto_file),
]
)
if result != 0:
logger.error("protoc failed with exit code %s", result)
return False
# Add SPDX headers and mypy ignore to generated files
spdx_header = (
"# SPDX-License-Identifier: Apache-2.0\n"
"# SPDX-FileCopyrightText: Copyright contributors to the vLLM project\n"
"# mypy: ignore-errors\n"
)
grpc_dir = ROOT_DIR / "vllm" / "grpc"
for generated_file in [
grpc_dir / "vllm_engine_pb2.py",
grpc_dir / "vllm_engine_pb2_grpc.py",
grpc_dir / "vllm_engine_pb2.pyi",
]:
if generated_file.exists():
content = generated_file.read_text()
if not content.startswith("# SPDX-License-Identifier"):
generated_file.write_text(spdx_header + content)
logger.info("gRPC protobuf compilation successful")
return True
class BuildPyAndGenerateGrpc(build_py):
"""Build Python modules and generate gRPC stubs from proto files."""
def run(self):
compile_grpc_protos()
super().run()
class DevelopAndGenerateGrpc(develop):
"""Develop mode that also generates gRPC stubs from proto files."""
def run(self):
compile_grpc_protos()
super().run()
class CMakeExtension(Extension): class CMakeExtension(Extension):
def __init__(self, name: str, cmake_lists_dir: str = ".", **kwa) -> None: def __init__(self, name: str, cmake_lists_dir: str = ".", **kwa) -> None:
super().__init__(name, sources=[], py_limited_api=not is_freethreaded(), **kwa) super().__init__(name, sources=[], py_limited_api=not is_freethreaded(), **kwa)
...@@ -734,13 +657,18 @@ class precompiled_wheel_utils: ...@@ -734,13 +657,18 @@ class precompiled_wheel_utils:
def get_base_commit_in_main_branch() -> str: def get_base_commit_in_main_branch() -> str:
try: try:
# Get the latest commit hash of the upstream main branch. # Get the latest commit hash of the upstream main branch.
resp_json = subprocess.check_output( curl_cmd = [
[ "curl",
"curl", "-s",
"-s", "https://api.github.com/repos/vllm-project/vllm/commits/main",
"https://api.github.com/repos/vllm-project/vllm/commits/main", ]
github_token = os.getenv("GH_TOKEN", os.getenv("GITHUB_TOKEN"))
if github_token:
curl_cmd += [
"-H",
f"Authorization: token {github_token}",
] ]
).decode("utf-8") resp_json = subprocess.check_output(curl_cmd).decode("utf-8")
upstream_main_commit = json.loads(resp_json)["sha"] upstream_main_commit = json.loads(resp_json)["sha"]
print(f"Upstream main branch latest commit: {upstream_main_commit}") print(f"Upstream main branch latest commit: {upstream_main_commit}")
...@@ -818,7 +746,7 @@ def _is_xpu() -> bool: ...@@ -818,7 +746,7 @@ def _is_xpu() -> bool:
def _build_custom_ops() -> bool: def _build_custom_ops() -> bool:
return _is_cuda() or _is_hip() or _is_cpu() return _is_cuda() or _is_hip()
def get_rocm_version(): def get_rocm_version():
...@@ -976,6 +904,11 @@ if _is_cuda(): ...@@ -976,6 +904,11 @@ if _is_cuda():
): ):
# FA3 requires CUDA 12.3 or later # FA3 requires CUDA 12.3 or later
ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C")) ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C"))
# FA4 CuteDSL - Python-only component for FA4's cute DSL support
# Optional since this doesn't produce a .so file, just copies Python files
ext_modules.append(
CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa4_cutedsl_C", optional=True)
)
if envs.VLLM_USE_PRECOMPILED or ( if envs.VLLM_USE_PRECOMPILED or (
CUDA_HOME and get_nvcc_cuda_version() >= Version("12.9") CUDA_HOME and get_nvcc_cuda_version() >= Version("12.9")
): ):
...@@ -987,6 +920,16 @@ if _is_cuda(): ...@@ -987,6 +920,16 @@ if _is_cuda():
CMakeExtension(name="vllm._flashmla_extension_C", optional=True) CMakeExtension(name="vllm._flashmla_extension_C", optional=True)
) )
if _is_cpu():
import platform
if platform.machine() in ("x86_64", "AMD64"):
ext_modules.append(CMakeExtension(name="vllm._C"))
ext_modules.append(CMakeExtension(name="vllm._C_AVX512"))
ext_modules.append(CMakeExtension(name="vllm._C_AVX2"))
else:
ext_modules.append(CMakeExtension(name="vllm._C"))
if _build_custom_ops(): if _build_custom_ops():
ext_modules.append(CMakeExtension(name="vllm._C")) ext_modules.append(CMakeExtension(name="vllm._C"))
...@@ -1014,17 +957,12 @@ if _no_device(): ...@@ -1014,17 +957,12 @@ if _no_device():
ext_modules = [] ext_modules = []
if not ext_modules: if not ext_modules:
cmdclass = { cmdclass = {}
"build_py": BuildPyAndGenerateGrpc,
"develop": DevelopAndGenerateGrpc,
}
else: else:
cmdclass = { cmdclass = {
"build_ext": precompiled_build_ext "build_ext": precompiled_build_ext
if envs.VLLM_USE_PRECOMPILED if envs.VLLM_USE_PRECOMPILED
else cmake_build_ext, else cmake_build_ext,
"build_py": BuildPyAndGenerateGrpc,
"develop": DevelopAndGenerateGrpc,
} }
setup( setup(
...@@ -1033,22 +971,28 @@ setup( ...@@ -1033,22 +971,28 @@ setup(
ext_modules=ext_modules, ext_modules=ext_modules,
install_requires=get_requirements(), install_requires=get_requirements(),
extras_require={ extras_require={
"bench": ["pandas", "matplotlib", "seaborn", "datasets", "scipy"], # AMD Zen CPU optimizations via zentorch
"zen": ["zentorch"],
"bench": ["pandas", "matplotlib", "seaborn", "datasets", "scipy", "plotly"],
"tensorizer": ["tensorizer==2.10.1"], "tensorizer": ["tensorizer==2.10.1"],
"fastsafetensors": ["fastsafetensors >= 0.2.2"], "fastsafetensors": ["fastsafetensors >= 0.2.2"],
"runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"], "instanttensor": ["instanttensor >= 0.1.5"],
"runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
"audio": [ "audio": [
"librosa", "librosa",
"scipy", "scipy",
"soundfile", "soundfile",
"mistral_common[audio]", "mistral_common[audio]",
"av",
], # Required for audio processing ], # Required for audio processing
"video": [], # Kept for backwards compatibility "video": [], # Kept for backwards compatibility
"flashinfer": [], # Kept for backwards compatibility "flashinfer": [], # Kept for backwards compatibility
# Optional deps for AMD FP4 quantization support # Optional deps for AMD FP4 quantization support
"petit-kernel": ["petit-kernel"], "petit-kernel": ["petit-kernel"],
# Optional deps for Helion kernel development # Optional deps for Helion kernel development
"helion": ["helion"], "helion": ["helion==0.3.2"],
# Optional deps for gRPC server (vllm serve --grpc)
"grpc": ["smg-grpc-servicer[vllm] >= 0.5.0"],
# Optional deps for OpenTelemetry tracing # Optional deps for OpenTelemetry tracing
"otel": [ "otel": [
"opentelemetry-sdk>=1.26.0", "opentelemetry-sdk>=1.26.0",
......
...@@ -11,6 +11,8 @@ from unittest.mock import Mock ...@@ -11,6 +11,8 @@ from unittest.mock import Mock
import pytest import pytest
import torch import torch
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
from vllm import LLM from vllm import LLM
from vllm.platforms import current_platform from vllm.platforms import current_platform
...@@ -91,6 +93,15 @@ def test_models( ...@@ -91,6 +93,15 @@ def test_models(
if enable_prompt_embeds: if enable_prompt_embeds:
with torch.no_grad(): with torch.no_grad():
prompt_embeds = hf_model.get_prompt_embeddings(example_prompts) prompt_embeds = hf_model.get_prompt_embeddings(example_prompts)
if model == "hmellor/tiny-random-Gemma2ForCausalLM" and (
Version(TRANSFORMERS_VERSION) < Version("5.3.0.dev0")
):
# For Gemma 1/2 models with Transformers 5.4.0+, the prompt embeddings
# are normalised in `get_prompt_embeddings`, like Gemma 3.
# For older versions, we need to manually normalise.
embed_scale = hf_model.config.hidden_size**0.5
normalizer = torch.tensor(embed_scale, dtype=prompt_embeds[0].dtype)
prompt_embeds = [p_e * normalizer for p_e in prompt_embeds]
with VllmRunner( with VllmRunner(
model, model,
...@@ -124,8 +135,6 @@ def test_models( ...@@ -124,8 +135,6 @@ def test_models(
[ [
("facebook/opt-125m", "ray", "", "L4", {}), ("facebook/opt-125m", "ray", "", "L4", {}),
("facebook/opt-125m", "mp", "", "L4", {}), ("facebook/opt-125m", "mp", "", "L4", {}),
("facebook/opt-125m", "ray", "", "L4", {"VLLM_SLEEP_WHEN_IDLE": "1"}),
("facebook/opt-125m", "mp", "", "L4", {"VLLM_SLEEP_WHEN_IDLE": "1"}),
("meta-llama/Llama-3.2-1B-Instruct", "ray", "", "L4", {}), ("meta-llama/Llama-3.2-1B-Instruct", "ray", "", "L4", {}),
("meta-llama/Llama-3.2-1B-Instruct", "mp", "", "L4", {}), ("meta-llama/Llama-3.2-1B-Instruct", "mp", "", "L4", {}),
("facebook/opt-125m", "ray", "", "A100", {}), ("facebook/opt-125m", "ray", "", "A100", {}),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment