Commit a99300bd authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.10.2rc1' into v0.10.2rc1-dev

parents cc3e01c7 5438967f
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
import os
import torch
from vllm import LLM
from vllm.pooling_params import PoolingParams
# This example shows how to perform an offline inference that generates
# multimodal data. In this specific case this example will take a geotiff
# image as input, process it using the multimodal data processor, and
# perform inference.
# Reuirement - install plugin at:
# https://github.com/christian-pinto/prithvi_io_processor_plugin
def main():
torch.set_default_dtype(torch.float16)
image_url = "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif" # noqa: E501
img_prompt = dict(
data=image_url,
data_format="url",
image_format="tiff",
out_data_format="b64_json",
)
llm = LLM(
model="christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM",
skip_tokenizer_init=True,
trust_remote_code=True,
enforce_eager=True,
# Limit the maximum number of parallel requests
# to avoid the model going OOM.
# The maximum number depends on the available GPU memory
max_num_seqs=32,
io_processor_plugin="prithvi_to_tiff_india",
)
pooling_params = PoolingParams(task="encode", softmax=False)
pooler_output = llm.encode(
img_prompt,
pooling_params=pooling_params,
)
output = pooler_output[0].outputs
print(output)
decoded_data = base64.b64decode(output.data)
file_path = os.path.join(os.getcwd(), "offline_prediction.tiff")
with open(file_path, "wb") as f:
f.write(decoded_data)
print(f"Output file path: {file_path}")
if __name__ == "__main__":
main()
...@@ -5,6 +5,7 @@ from transformers import AutoTokenizer ...@@ -5,6 +5,7 @@ from transformers import AutoTokenizer
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.benchmarks.datasets import add_dataset_parser, get_samples from vllm.benchmarks.datasets import add_dataset_parser, get_samples
from vllm.inputs import TokensPrompt
from vllm.v1.metrics.reader import Counter, Vector from vllm.v1.metrics.reader import Counter, Vector
try: try:
...@@ -137,7 +138,8 @@ def main(): ...@@ -137,7 +138,8 @@ def main():
sampling_params = SamplingParams(temperature=args.temp, max_tokens=args.output_len) sampling_params = SamplingParams(temperature=args.temp, max_tokens=args.output_len)
if not args.custom_mm_prompts: if not args.custom_mm_prompts:
outputs = llm.generate( outputs = llm.generate(
prompt_token_ids=prompt_ids, sampling_params=sampling_params [TokensPrompt(prompt_token_ids=x) for x in prompt_ids],
sampling_params=sampling_params,
) )
else: else:
outputs = llm.chat(prompts, sampling_params=sampling_params) outputs = llm.chat(prompts, sampling_params=sampling_params)
......
...@@ -85,7 +85,7 @@ def format_output(title: str, output: str): ...@@ -85,7 +85,7 @@ def format_output(title: str, output: str):
def generate_output(prompt: str, sampling_params: SamplingParams, llm: LLM): def generate_output(prompt: str, sampling_params: SamplingParams, llm: LLM):
outputs = llm.generate(prompts=prompt, sampling_params=sampling_params) outputs = llm.generate(prompt, sampling_params=sampling_params)
return outputs[0].outputs[0].text return outputs[0].outputs[0].text
......
...@@ -173,6 +173,37 @@ def run_deepseek_vl2(questions: list[str], modality: str) -> ModelRequestData: ...@@ -173,6 +173,37 @@ def run_deepseek_vl2(questions: list[str], modality: str) -> ModelRequestData:
) )
# Ernie4.5-VL
def run_ernie45_vl(questions: list[str], modality: str) -> ModelRequestData:
model_name = "baidu/ERNIE-4.5-VL-28B-A3B-PT"
engine_args = EngineArgs(
model=model_name,
max_model_len=4096,
max_num_seqs=5,
limit_mm_per_prompt={modality: 1},
trust_remote_code=True,
)
if modality == "image":
placeholder = "Picture 1:<|IMAGE_START|><|image@placeholder|><|IMAGE_END|>"
elif modality == "video":
placeholder = "Video 1:<|VIDEO_START|><|video@placeholder|><|VIDEO_END|>"
prompts = [
(
f"<|begin_of_sentence|>User: {question}{placeholder}\n"
"Assistant: <think></think>"
)
for question in questions
]
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# Florence2 # Florence2
def run_florence2(questions: list[str], modality: str) -> ModelRequestData: def run_florence2(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -283,8 +314,10 @@ def run_glm4v(questions: list[str], modality: str) -> ModelRequestData: ...@@ -283,8 +314,10 @@ def run_glm4v(questions: list[str], modality: str) -> ModelRequestData:
) )
prompts = [ prompts = [
f"<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>\ (
{question}<|assistant|>" "<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>"
f"{question}<|assistant|>"
)
for question in questions for question in questions
] ]
...@@ -333,6 +366,80 @@ def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData: ...@@ -333,6 +366,80 @@ def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData:
) )
# GLM-4.5V
def run_glm4_5v(questions: list[str], modality: str) -> ModelRequestData:
model_name = "zai-org/GLM-4.5V"
engine_args = EngineArgs(
model=model_name,
max_model_len=4096,
max_num_seqs=2,
mm_processor_kwargs={
"size": {"shortest_edge": 12544, "longest_edge": 47040000},
"fps": 1,
},
limit_mm_per_prompt={modality: 1},
enforce_eager=True,
tensor_parallel_size=4,
)
if modality == "image":
placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
elif modality == "video":
placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
prompts = [
(
"[gMASK]<sop><|system|>\nYou are a helpful assistant.<|user|>\n"
f"{placeholder}"
f"{question}<|assistant|>assistant\n"
)
for question in questions
]
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# GLM-4.5V-FP8
def run_glm4_5v_fp8(questions: list[str], modality: str) -> ModelRequestData:
model_name = "zai-org/GLM-4.5V-FP8"
engine_args = EngineArgs(
model=model_name,
max_model_len=4096,
max_num_seqs=2,
mm_processor_kwargs={
"size": {"shortest_edge": 12544, "longest_edge": 47040000},
"fps": 1,
},
limit_mm_per_prompt={modality: 1},
enforce_eager=True,
tensor_parallel_size=4,
)
if modality == "image":
placeholder = "<|begin_of_image|><|image|><|end_of_image|>"
elif modality == "video":
placeholder = "<|begin_of_video|><|video|><|end_of_video|>"
prompts = [
(
"[gMASK]<sop><|system|>\nYou are a helpful assistant.<|user|>\n"
f"{placeholder}"
f"{question}<|assistant|>assistant\n"
)
for question in questions
]
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# H2OVL-Mississippi # H2OVL-Mississippi
def run_h2ovl(questions: list[str], modality: str) -> ModelRequestData: def run_h2ovl(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -383,8 +490,8 @@ def run_hyperclovax_seed_vision( ...@@ -383,8 +490,8 @@ def run_hyperclovax_seed_vision(
for question in questions: for question in questions:
if modality == "image": if modality == "image":
""" """
ocr: List the words in the image in raster order. ocr: List the words in the image in raster order.
Even if the word order feels unnatural for reading, Even if the word order feels unnatural for reading,
the model will handle it as long as it follows raster order. the model will handle it as long as it follows raster order.
e.g. "Naver, CLOVA, bigshane" e.g. "Naver, CLOVA, bigshane"
lens_keywords: List the entity names in the image. lens_keywords: List the entity names in the image.
...@@ -693,15 +800,13 @@ def run_llava_next_video(questions: list[str], modality: str) -> ModelRequestDat ...@@ -693,15 +800,13 @@ def run_llava_next_video(questions: list[str], modality: str) -> ModelRequestDat
def run_llava_onevision(questions: list[str], modality: str) -> ModelRequestData: def run_llava_onevision(questions: list[str], modality: str) -> ModelRequestData:
if modality == "video": if modality == "video":
prompts = [ prompts = [
f"<|im_start|>user <video>\n{question}<|im_end|> \ f"<|im_start|>user <video>\n{question}<|im_end|><|im_start|>assistant\n"
<|im_start|>assistant\n"
for question in questions for question in questions
] ]
elif modality == "image": elif modality == "image":
prompts = [ prompts = [
f"<|im_start|>user <image>\n{question}<|im_end|> \ f"<|im_start|>user <image>\n{question}<|im_end|><|im_start|>assistant\n"
<|im_start|>assistant\n"
for question in questions for question in questions
] ]
...@@ -815,6 +920,39 @@ def run_minicpmv(questions: list[str], modality: str) -> ModelRequestData: ...@@ -815,6 +920,39 @@ def run_minicpmv(questions: list[str], modality: str) -> ModelRequestData:
return run_minicpmv_base(questions, modality, "openbmb/MiniCPM-V-2_6") return run_minicpmv_base(questions, modality, "openbmb/MiniCPM-V-2_6")
def run_minimax_vl_01(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"
model_name = "MiniMaxAI/MiniMax-VL-01"
engine_args = EngineArgs(
model=model_name,
max_num_seqs=2,
limit_mm_per_prompt={modality: 1},
trust_remote_code=True,
tensor_parallel_size=8,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
messages = [
[
{
"role": "user",
"content": [{"type": "image"}, {"type": "text", "text": question}],
}
]
for question in questions
]
prompts = tokenizer.apply_chat_template(
messages, add_generation_prompt=True, tokenize=False
)
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# Mistral-3 HF-format # Mistral-3 HF-format
def run_mistral3(questions: list[str], modality: str) -> ModelRequestData: def run_mistral3(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -891,8 +1029,7 @@ def run_molmo(questions: list[str], modality: str) -> ModelRequestData: ...@@ -891,8 +1029,7 @@ def run_molmo(questions: list[str], modality: str) -> ModelRequestData:
) )
prompts = [ prompts = [
f"<|im_start|>user <image>\n{question}<|im_end|> \ f"<|im_start|>user <image>\n{question}<|im_end|><|im_start|>assistant\n"
<|im_start|>assistant\n"
for question in questions for question in questions
] ]
...@@ -998,6 +1135,38 @@ def run_ovis(questions: list[str], modality: str) -> ModelRequestData: ...@@ -998,6 +1135,38 @@ def run_ovis(questions: list[str], modality: str) -> ModelRequestData:
) )
# Ovis2_5
def run_ovis2_5(questions: list[str], modality: str) -> ModelRequestData:
model_name = "AIDC-AI/Ovis2.5-2B"
engine_args = EngineArgs(
model=model_name,
max_model_len=4096,
max_num_seqs=2,
trust_remote_code=True,
dtype="half",
limit_mm_per_prompt={modality: 1},
)
if modality == "image":
placeholder = "<image>"
elif modality == "video":
placeholder = "<video>"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
messages = [
[{"role": "user", "content": f"{placeholder}\n{question}"}]
for question in questions
]
prompts = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# PaliGemma # PaliGemma
def run_paligemma(questions: list[str], modality: str) -> ModelRequestData: def run_paligemma(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -1297,6 +1466,28 @@ def run_qwen2_5_omni(questions: list[str], modality: str): ...@@ -1297,6 +1466,28 @@ def run_qwen2_5_omni(questions: list[str], modality: str):
) )
# R-4B
def run_r_vl(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"
model_name = "YannQi/R-4B"
prompts = [
f"<|im_start|>user <image>\n{question}<|im_end|><|im_start|>assistant\n"
for question in questions
]
engine_args = EngineArgs(
model=model_name,
max_model_len=16384,
limit_mm_per_prompt={modality: 1},
)
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# SkyworkR1V # SkyworkR1V
def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData: def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -1442,12 +1633,15 @@ model_example_map = { ...@@ -1442,12 +1633,15 @@ model_example_map = {
"chameleon": run_chameleon, "chameleon": run_chameleon,
"command_a_vision": run_command_a_vision, "command_a_vision": run_command_a_vision,
"deepseek_vl_v2": run_deepseek_vl2, "deepseek_vl_v2": run_deepseek_vl2,
"ernie45_vl": run_ernie45_vl,
"florence2": run_florence2, "florence2": run_florence2,
"fuyu": run_fuyu, "fuyu": run_fuyu,
"gemma3": run_gemma3, "gemma3": run_gemma3,
"gemma3n": run_gemma3n, "gemma3n": run_gemma3n,
"glm4v": run_glm4v, "glm4v": run_glm4v,
"glm4_1v": run_glm4_1v, "glm4_1v": run_glm4_1v,
"glm4_5v": run_glm4_5v,
"glm4_5v_fp8": run_glm4_5v_fp8,
"h2ovl_chat": run_h2ovl, "h2ovl_chat": run_h2ovl,
"hyperclovax_seed_vision": run_hyperclovax_seed_vision, "hyperclovax_seed_vision": run_hyperclovax_seed_vision,
"idefics3": run_idefics3, "idefics3": run_idefics3,
...@@ -1463,12 +1657,14 @@ model_example_map = { ...@@ -1463,12 +1657,14 @@ model_example_map = {
"mantis": run_mantis, "mantis": run_mantis,
"minicpmo": run_minicpmo, "minicpmo": run_minicpmo,
"minicpmv": run_minicpmv, "minicpmv": run_minicpmv,
"minimax_vl_01": run_minimax_vl_01,
"mistral3": run_mistral3, "mistral3": run_mistral3,
"mllama": run_mllama, "mllama": run_mllama,
"molmo": run_molmo, "molmo": run_molmo,
"nemotron_vl": run_nemotron_vl, "nemotron_vl": run_nemotron_vl,
"NVLM_D": run_nvlm_d, "NVLM_D": run_nvlm_d,
"ovis": run_ovis, "ovis": run_ovis,
"ovis2_5": run_ovis2_5,
"paligemma": run_paligemma, "paligemma": run_paligemma,
"paligemma2": run_paligemma2, "paligemma2": run_paligemma2,
"phi3_v": run_phi3v, "phi3_v": run_phi3v,
...@@ -1479,6 +1675,7 @@ model_example_map = { ...@@ -1479,6 +1675,7 @@ model_example_map = {
"qwen2_vl": run_qwen2_vl, "qwen2_vl": run_qwen2_vl,
"qwen2_5_vl": run_qwen2_5_vl, "qwen2_5_vl": run_qwen2_5_vl,
"qwen2_5_omni": run_qwen2_5_omni, "qwen2_5_omni": run_qwen2_5_omni,
"rvl": run_r_vl,
"skywork_chat": run_skyworkr1v, "skywork_chat": run_skyworkr1v,
"smolvlm": run_smolvlm, "smolvlm": run_smolvlm,
"step3": run_step3, "step3": run_step3,
......
...@@ -680,6 +680,36 @@ def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData: ...@@ -680,6 +680,36 @@ def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData:
) )
# ovis2_5
def load_ovis2_5(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "AIDC-AI/Ovis2.5-2B"
engine_args = EngineArgs(
model=model_name,
max_model_len=8192,
max_num_seqs=2,
trust_remote_code=True,
dtype="half",
limit_mm_per_prompt={"image": len(image_urls)},
)
placeholders = "\n".join(
f"Image-{i}: <image>\n" for i, _ in enumerate(image_urls, start=1)
)
messages = [{"role": "user", "content": f"{placeholders}\n{question}"}]
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
)
def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData: def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "mistral-community/pixtral-12b" model_name = "mistral-community/pixtral-12b"
...@@ -962,6 +992,39 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData: ...@@ -962,6 +992,39 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
) )
def load_r_vl(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "YannQi/R-4B"
engine_args = EngineArgs(
model=model_name,
max_model_len=16384,
max_num_seqs=16,
limit_mm_per_prompt={"image": len(image_urls)},
)
placeholders = [{"type": "image", "image": url} for url in image_urls]
messages = [
{
"role": "user",
"content": [
*placeholders,
{"type": "text", "text": question},
],
}
]
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
prompt = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
)
def load_smolvlm(question: str, image_urls: list[str]) -> ModelRequestData: def load_smolvlm(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "HuggingFaceTB/SmolVLM2-2.2B-Instruct" model_name = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
...@@ -1064,6 +1127,76 @@ def load_tarsier2(question: str, image_urls: list[str]) -> ModelRequestData: ...@@ -1064,6 +1127,76 @@ def load_tarsier2(question: str, image_urls: list[str]) -> ModelRequestData:
) )
# GLM-4.5V
def load_glm4_5v(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "zai-org/GLM-4.5V"
engine_args = EngineArgs(
model=model_name,
max_model_len=32768,
max_num_seqs=2,
limit_mm_per_prompt={"image": len(image_urls)},
enforce_eager=True,
tensor_parallel_size=4,
)
placeholders = [{"type": "image", "image": url} for url in image_urls]
messages = [
{
"role": "user",
"content": [
*placeholders,
{"type": "text", "text": question},
],
}
]
processor = AutoProcessor.from_pretrained(model_name)
prompt = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_data = [fetch_image(url) for url in image_urls]
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=image_data,
)
# GLM-4.5V-FP8
def load_glm4_5v_fp8(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "zai-org/GLM-4.5V-FP8"
engine_args = EngineArgs(
model=model_name,
max_model_len=32768,
max_num_seqs=2,
limit_mm_per_prompt={"image": len(image_urls)},
enforce_eager=True,
tensor_parallel_size=4,
)
placeholders = [{"type": "image", "image": url} for url in image_urls]
messages = [
{
"role": "user",
"content": [
*placeholders,
{"type": "text", "text": question},
],
}
]
processor = AutoProcessor.from_pretrained(model_name)
prompt = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_data = [fetch_image(url) for url in image_urls]
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=image_data,
)
model_example_map = { model_example_map = {
"aria": load_aria, "aria": load_aria,
"aya_vision": load_aya_vision, "aya_vision": load_aya_vision,
...@@ -1085,6 +1218,7 @@ model_example_map = { ...@@ -1085,6 +1218,7 @@ model_example_map = {
"mllama": load_mllama, "mllama": load_mllama,
"NVLM_D": load_nvlm_d, "NVLM_D": load_nvlm_d,
"ovis": load_ovis, "ovis": load_ovis,
"ovis2_5": load_ovis2_5,
"phi3_v": load_phi3v, "phi3_v": load_phi3v,
"phi4_mm": load_phi4mm, "phi4_mm": load_phi4mm,
"phi4_multimodal": load_phi4_multimodal, "phi4_multimodal": load_phi4_multimodal,
...@@ -1092,10 +1226,13 @@ model_example_map = { ...@@ -1092,10 +1226,13 @@ model_example_map = {
"qwen_vl_chat": load_qwen_vl_chat, "qwen_vl_chat": load_qwen_vl_chat,
"qwen2_vl": load_qwen2_vl, "qwen2_vl": load_qwen2_vl,
"qwen2_5_vl": load_qwen2_5_vl, "qwen2_5_vl": load_qwen2_5_vl,
"rvl": load_r_vl,
"smolvlm": load_smolvlm, "smolvlm": load_smolvlm,
"step3": load_step3, "step3": load_step3,
"tarsier": load_tarsier, "tarsier": load_tarsier,
"tarsier2": load_tarsier2, "tarsier2": load_tarsier2,
"glm4_5v": load_glm4_5v,
"glm4_5v_fp8": load_glm4_5v_fp8,
} }
......
...@@ -27,10 +27,12 @@ class BlockStored(KVCacheEvent): ...@@ -27,10 +27,12 @@ class BlockStored(KVCacheEvent):
token_ids: list[int] token_ids: list[int]
block_size: int block_size: int
lora_id: Optional[int] lora_id: Optional[int]
medium: Optional[str]
class BlockRemoved(KVCacheEvent): class BlockRemoved(KVCacheEvent):
block_hashes: list[int] block_hashes: list[int]
medium: Optional[str]
class AllBlocksCleared(KVCacheEvent): class AllBlocksCleared(KVCacheEvent):
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import base64
import os
import requests
# This example shows how to perform an online inference that generates
# multimodal data. In this specific case this example will take a geotiff
# image as input, process it using the multimodal data processor, and
# perform inference.
# Reuirements :
# - install plugin at:
# https://github.com/christian-pinto/prithvi_io_processor_plugin
# - start vllm in serving mode with the below args
# --model='christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM'
# --task embed --trust-remote-code
# --skip-tokenizer-init --enforce-eager
# --io-processor-plugin prithvi_to_tiff_india
def main():
image_url = "https://huggingface.co/christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM/resolve/main/India_900498_S2Hand.tif" # noqa: E501
server_endpoint = "http://localhost:8000/pooling"
request_payload_url = {
"data": {
"data": image_url,
"data_format": "url",
"image_format": "tiff",
"out_data_format": "b64_json",
},
"priority": 0,
"model": "christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM",
}
ret = requests.post(server_endpoint, json=request_payload_url)
print(f"response.status_code: {ret.status_code}")
print(f"response.reason:{ret.reason}")
response = ret.json()
decoded_image = base64.b64decode(response["data"]["data"])
out_path = os.path.join(os.getcwd(), "online_prediction.tiff")
with open(out_path, "wb") as f:
f.write(decoded_image)
if __name__ == "__main__":
main()
{% if not add_generation_prompt is defined %}
{% set add_generation_prompt = false %}
{% endif %}
{% if not thinking is defined %}
{% set thinking = false %}
{% endif %}
{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}
{%- for message in messages %}
{%- if message['role'] == 'system' %}
{%- if ns.is_first_sp %}
{% set ns.system_prompt = ns.system_prompt + message['content'] %}
{% set ns.is_first_sp = false %}
{%- else %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
{%- endif %}
{%- endif %}
{%- endfor %}
{% if tools is defined and tools is not none %}
{% set tool_ns = namespace(text='## Tools\nYou have access to the following tools:\n') %}
{% for tool in tools %}
{% set tool_ns.text = tool_ns.text + '\n### ' + tool.function.name + '\nDescription: ' + tool.function.description + '\n\nParameters: ' + (tool.function.parameters | tojson) + '\n' %}
{% endfor %}
{% set tool_ns.text = tool_ns.text + "\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<|tool▁calls▁begin|><|tool▁call▁begin|>tool_call_name<|tool▁sep|>tool_call_arguments<|tool▁call▁end|>{{additional_tool_calls}}<|tool▁calls▁end|>\n\nWhere:\n\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n" %}
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
{% endif %}
{{ bos_token }}{{ ns.system_prompt }}
{%- for message in messages %}
{%- if message['role'] == 'user' %}
{%- set ns.is_tool = false -%}
{%- set ns.is_first = false -%}
{%- set ns.is_last_user = true -%}
{{'<|User|>' + message['content']}}
{%- endif %}
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- if ns.is_last_user %}
{{'<|Assistant|></think>'}}
{%- endif %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_first = false %}
{%- set ns.is_tool = false -%}
{%- for tool in message['tool_calls'] %}
{%- if not ns.is_first %}
{%- if message['content'] is none %}
{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
{%- else %}
{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
{%- endif %}
{%- set ns.is_first = true -%}
{%- else %}
{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
{%- endif %}
{%- endfor %}
{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
{%- endif %}
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
{%- if ns.is_last_user %}
{{'<|Assistant|>'}}
{%- if message['prefix'] is defined and message['prefix'] and thinking %}
{{'<think>'}}
{%- else %}
{{'</think>'}}
{%- endif %}
{%- endif %}
{%- set ns.is_last_user = false -%}
{%- if ns.is_tool %}
{{message['content'] + '<|end▁of▁sentence|>'}}
{%- set ns.is_tool = false -%}
{%- else %}
{%- set content = message['content'] -%}
{%- if '</think>' in content %}
{%- set content = content.split('</think>', 1)[1] -%}
{%- endif %}
{{content + '<|end▁of▁sentence|>'}}
{%- endif %}
{%- endif %}
{%- if message['role'] == 'tool' %}
{%- set ns.is_last_user = false -%}
{%- set ns.is_tool = true -%}
{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
{%- endif %}
{%- endfor -%}
{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}
{{'<|Assistant|>'}}
{%- if not thinking %}
{{'</think>'}}
{%- else %}
{{'<think>'}}
{%- endif %}
{% endif %}
{#- Begin-of-sequence token to start the model prompt -#}
{{ bos_token }}
{#- Extracts the system message. Gemma does not support system messages so it will be prepended to first user message. -#}
{%- if messages[0]['role'] == 'system' -%}
{%- if messages[0]['content'] is string -%}
{%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
{%- else -%}
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
{%- endif -%}
{%- set loop_messages = messages[1:] -%}
{%- else -%}
{%- set first_user_prefix = "" -%}
{%- set loop_messages = messages -%}
{%- endif -%}
{#- Set tools to none if not defined for this ChatCompletion request (helps avoid errors later) -#}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{#- Validate alternating user/assistant messages (excluding 'tool' messages and ones with tool_calls) -#}
{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | selectattr("tool_calls", "undefined") -%}
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
{%- endif -%}
{%- endfor -%}
{#- Main loop over all messages in the conversation history -#}
{%- for message in loop_messages -%}
{#- Normalize roles for model prompt formatting -#}
{%- if (message['role'] == 'assistant') -%}
{%- set role = "model" -%}
{%- elif (message['role'] == 'tool') -%}
{%- set role = "user" -%}
{%- else -%}
{%- set role = message['role'] -%}
{%- endif -%}
{#- Mark the start of a message block with the appropriate role -#}
{{ '<start_of_turn>' + role + '\n' -}}
{#- Insert system message content (if present) at the beginning of the first message. -#}
{%- if loop.first -%}
{{ first_user_prefix }}
{#- Append system message with tool information if using tools in message request. -#}
{%- if tools is not none -%}
{{- "Tools (functions) are available. If you decide to invoke one or more of the tools, you must respond with a python list of the function calls.\n" -}}
{{- "Example Format: [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] \n" -}}
{{- "Do not use variables. DO NOT USE MARKDOWN SYNTAX. You SHOULD NOT include any other text in the response if you call a function. If none of the functions can be used, point it out. If you lack the parameters required by the function, also point it out.\n" -}}
{{- "Here is a list of functions in JSON format that you can invoke.\n" -}}
{{- tools | tojson(indent=4) -}}
{{- "\n\n" -}}
{%- endif -%}
{%- endif -%}
{#- Format model tool calls (turns where model indicates they want to call a tool) -#}
{%- if 'tool_calls' in message -%}
{#- Opening bracket for tool call list. -#}
{{- '[' -}}
{#- For each tool call -#}
{%- for tool_call in message.tool_calls -%}
{#- Get tool call function. -#}
{%- if tool_call.function is defined -%}
{%- set tool_call = tool_call.function -%}
{%- endif -%}
{#- Function name & opening parenthesis. -#}
{{- tool_call.name + '(' -}}
{#-- Handle arguments as list (positional) or dict (named) --#}
{#-- Named arguments (dict) --#}
{%- if tool_call.arguments is iterable and tool_call.arguments is mapping -%}
{%- set first = true -%}
{%- for key, val in tool_call.arguments.items() -%}
{%- if not first %}, {% endif -%}
{{ key }}={{ val | tojson }}
{%- set first = false -%}
{%- endfor -%}
{#-- Positional arguments (list) --#}
{%- elif tool_call.arguments is iterable -%}
{{- tool_call.arguments | map('tojson') | join(', ') -}}
{#-- Fallback: single positional value --#}
{%- else -%}
{{- tool_call.arguments | tojson -}}
{#-- Closing parenthesis. --#}
{%- endif -%}
{{- ')' -}}
{#-- If more than one tool call, place comma and move to formatting next tool call --#}
{%- if not loop.last -%}, {% endif -%}
{%- endfor -%}
{#- Closing bracket for tool call list. -#}
{{- ']' -}}
{%- endif -%}
{#- Tool response start tag (for messages from a tool) -#}
{%- if (message['role'] == 'tool') -%}
{{ '<tool_response>\n' -}}
{%- endif -%}
{#- Render the message content: handle plain string or multimodal content like image/text -#}
{%- if message['content'] is string -%}
{{ message['content'] | trim }}
{%- elif message['content'] is iterable -%}
{%- for item in message['content'] -%}
{%- if item['type'] == 'image' -%}
{{ '<start_of_image>' }}
{%- elif item['type'] == 'text' -%}
{{ item['text'] | trim }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{ raise_exception("Invalid content type") }}
{%- endif -%}
{#- Tool response end tag -#}
{%- if (message['role'] == 'tool') -%}
{{ '</tool_response>' -}}
{%- endif -%}
{#- Mark end of a single turn -#}
{{ '<end_of_turn>\n' }}
{%- endfor -%}
{#- If generation is to be triggered, add model prompt prefix -#}
{%- if add_generation_prompt -%}
{{'<start_of_turn>model\n'}}
{%- endif -%}
\ No newline at end of file
{%- if messages and messages[0]['role'] == 'system' %}
{%- set system_message = messages[0]['content']|trim %}
{%- set messages = messages[1:] %}
{%- else %}
{%- set system_message = "You are a helpful assistant." %}
{%- endif %}
{%- if messages %} {%- if messages %}
{%- if system_message or tools %}
<|system|> <|system|>
{%- if system_message %}
{{ system_message }} {{ system_message }}
{%- endif %} {%- if tools %}
In addition to plain text responses, you can chose to call one or more of the provided functions. In addition to plain text responses, you can chose to call one or more of the provided functions.
Use the following rule to decide when to call a function: Use the following rule to decide when to call a function:
...@@ -19,13 +23,11 @@ If you decide to call functions: ...@@ -19,13 +23,11 @@ If you decide to call functions:
* make sure you pick the right functions that match the user intent * make sure you pick the right functions that match the user intent
{%- if tools %}
{%- for t in tools %} {%- for t in tools %}
{{- t | tojson(indent=4) }} {{- t | tojson(indent=4) }}
{{- "\n\n" }} {{- "\n\n" }}
{%- endfor %} {%- endfor %}
{%- endif %}<|end|> {%- endif %}<|end|>
{%- endif %}
{%- for message in messages %} {%- for message in messages %}
{%- if message.role != "system" %} {%- if message.role != "system" %}
......
{% macro render_extra_keys(json_dict, handled_keys) %}
{%- if json_dict is mapping %}
{%- for json_key in json_dict if json_key not in handled_keys %}
{%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
{{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
{%- else %}
{{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
{%- endif %}
{%- endfor %}
{%- endif %}
{% endmacro %}
{%- if messages[0]["role"] == "system" %}
{%- set system_message = messages[0]["content"] %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set loop_messages = messages %}
{%- endif %}
{%- if not tools is defined %}
{%- set tools = [] %}
{%- endif %}
{%- if system_message is defined %}
{{- "<|im_start|>system\n" + system_message }}
{%- else %}
{%- if tools is iterable and tools | length > 0 %}
{{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
{%- endif %}
{%- endif %}
{%- if tools is iterable and tools | length > 0 %}
{{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
{{- "<tools>" }}
{%- for tool in tools %}
{%- if tool.function is defined %}
{%- set tool = tool.function %}
{%- endif %}
{{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
{%- if tool.description is defined %}
{{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
{%- endif %}
{{- '\n<parameters>' }}
{%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{{- '\n<parameter>' }}
{{- '\n<name>' ~ param_name ~ '</name>' }}
{%- if param_fields.type is defined %}
{{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
{%- endif %}
{%- if param_fields.description is defined %}
{{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
{%- endif %}
{%- set handled_keys = ['name', 'type', 'description'] %}
{{- render_extra_keys(param_fields, handled_keys) }}
{{- '\n</parameter>' }}
{%- endfor %}
{%- endif %}
{% set handled_keys = ['type', 'properties'] %}
{{- render_extra_keys(tool.parameters, handled_keys) }}
{{- '\n</parameters>' }}
{%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
{{- render_extra_keys(tool, handled_keys) }}
{{- '\n</function>' }}
{%- endfor %}
{{- "\n</tools>" }}
{{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
{%- endif %}
{%- if system_message is defined %}
{{- '<|im_end|>\n' }}
{%- else %}
{%- if tools is iterable and tools | length > 0 %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- for message in loop_messages %}
{%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
{{- '<|im_start|>' + message.role }}
{%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
{{- '\n' + message.content | trim + '\n' }}
{%- endif %}
{%- for tool_call in message.tool_calls %}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
{%- if tool_call.arguments is defined %}
{%- for args_name, args_value in tool_call.arguments|items %}
{{- '<parameter=' + args_name + '>\n' }}
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
{{- args_value }}
{{- '\n</parameter>\n' }}
{%- endfor %}
{%- endif %}
{{- '</function>\n</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "tool" %}
{%- if loop.previtem and loop.previtem.role != "tool" %}
{{- '<|im_start|>user\n' }}
{%- endif %}
{{- '<tool_response>\n' }}
{{- message.content }}
{{- '\n</tool_response>\n' }}
{%- if not loop.last and loop.nextitem.role != "tool" %}
{{- '<|im_end|>\n' }}
{%- elif loop.last %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}
...@@ -129,15 +129,16 @@ markdown_extensions: ...@@ -129,15 +129,16 @@ markdown_extensions:
- toc: - toc:
permalink: true permalink: true
# For math rendering # For math rendering
- mdx_math: - pymdownx.arithmatex:
enable_dollar_delimiter: true generic: true
extra_css: extra_css:
- mkdocs/stylesheets/extra.css - mkdocs/stylesheets/extra.css
extra_javascript: extra_javascript:
- mkdocs/javascript/run_llm_widget.js - mkdocs/javascript/run_llm_widget.js
- https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML - mkdocs/javascript/mathjax.js
- https://unpkg.com/mathjax@3.2.2/es5/tex-mml-chtml.js
- mkdocs/javascript/edit_and_feedback.js - mkdocs/javascript/edit_and_feedback.js
- mkdocs/javascript/slack_and_forum.js - mkdocs/javascript/slack_and_forum.js
......
...@@ -24,13 +24,14 @@ classifiers = [ ...@@ -24,13 +24,14 @@ classifiers = [
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Intended Audience :: Developers", "Intended Audience :: Developers",
"Intended Audience :: Information Technology", "Intended Audience :: Information Technology",
"Intended Audience :: Science/Research", "Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Scientific/Engineering :: Information Analysis",
] ]
requires-python = ">=3.9,<3.13" requires-python = ">=3.9,<3.14"
dynamic = [ "version", "dependencies", "optional-dependencies"] dynamic = [ "version", "dependencies", "optional-dependencies"]
[project.urls] [project.urls]
......
...@@ -8,3 +8,4 @@ torch==2.5.1 ...@@ -8,3 +8,4 @@ torch==2.5.1
wheel wheel
jinja2>=3.1.6 jinja2>=3.1.6
regex regex
build
...@@ -7,20 +7,21 @@ requests >= 2.26.0 ...@@ -7,20 +7,21 @@ requests >= 2.26.0
tqdm tqdm
blake3 blake3
py-cpuinfo py-cpuinfo
transformers >= 4.55.0 transformers >= 4.55.2
tokenizers >= 0.21.1 # Required for fast incremental detokenization. tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf # Required by LlamaTokenizer. protobuf # Required by LlamaTokenizer.
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
aiohttp aiohttp
openai >= 1.99.1 # For Responses API with reasoning content openai >= 1.99.1 # For Responses API with reasoning content
pydantic >= 2.10 pydantic >= 2.11.7
prometheus_client >= 0.18.0 prometheus_client >= 0.18.0
pillow # Required for image processing pillow # Required for image processing
prometheus-fastapi-instrumentator >= 7.0.0 prometheus-fastapi-instrumentator >= 7.0.0
tiktoken >= 0.6.0 # Required for DBRX tokenizer tiktoken >= 0.6.0 # Required for DBRX tokenizer
lm-format-enforcer >= 0.10.11, < 0.11 lm-format-enforcer == 0.11.3
llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" llguidance >= 0.7.11, < 0.8.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64"
outlines_core == 0.2.10 outlines_core == 0.2.10 ; platform_machine != "s390x"
outlines == 0.1.11 ; platform_machine == "s390x"
# required for outlines backend disk cache # required for outlines backend disk cache
diskcache == 5.6.3 diskcache == 5.6.3
lark == 1.2.2 lark == 1.2.2
...@@ -38,7 +39,7 @@ pyyaml ...@@ -38,7 +39,7 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL. einops # Required for Qwen2-VL.
compressed-tensors == 0.10.2 # required for compressed-tensors compressed-tensors == 0.11.0 # required for compressed-tensors
depyf==0.19.0 # required for profiling and debugging with compilation config depyf==0.19.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files watchfiles # required for http server to monitor the updates of TLS files
......
# Common dependencies # Common dependencies
-r common.txt -r common.txt
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.60.0; python_version == '3.9' and platform_machine != "s390x" # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9' numba == 0.61.2; python_version > '3.9' and platform_machine != "s390x"
# Dependencies for CPUs # Dependencies for CPUs
packaging>=24.2 packaging>=24.2
setuptools>=77.0.3,<80.0.0 setuptools>=77.0.3,<80.0.0
--extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://download.pytorch.org/whl/cpu
torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218 torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
torch==2.7.0; platform_system == "Darwin" torch==2.8.0; platform_system == "Darwin"
torch==2.7.0; platform_machine == "ppc64le" torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
torch==2.6.0; platform_machine == "aarch64" # for arm64 CPUs, torch 2.7.0 has a issue: https://github.com/vllm-project/vllm/issues/17960
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x" torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
torchaudio==2.7.0; platform_machine == "ppc64le" torchaudio==2.8.0; platform_machine == "ppc64le"
# required for the image processor of phi3v, this must be updated alongside torch # required for the image processor of phi3v, this must be updated alongside torch
torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
torchvision==0.22.0; platform_machine == "ppc64le" torchvision==0.23.0; platform_machine == "ppc64le"
datasets # for benchmark scripts datasets # for benchmark scripts
# Intel Extension for PyTorch, only for x86_64 CPUs # Intel Extension for PyTorch, only for x86_64 CPUs
......
...@@ -6,9 +6,9 @@ numba == 0.61.2; python_version > '3.9' ...@@ -6,9 +6,9 @@ numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs # Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.7.1 torch==2.8.0
torchaudio==2.7.1 torchaudio==2.8.0
# These must be updated alongside torch # These must be updated alongside torch
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version torchvision==0.23.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.32.post1
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7 xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8
\ No newline at end of file
...@@ -7,27 +7,12 @@ mkdocs-awesome-nav ...@@ -7,27 +7,12 @@ mkdocs-awesome-nav
mkdocs-glightbox mkdocs-glightbox
mkdocs-git-revision-date-localized-plugin mkdocs-git-revision-date-localized-plugin
mkdocs-minify-plugin mkdocs-minify-plugin
python-markdown-math
regex regex
ruff ruff
# Required for argparse hook only # Required for argparse hook only
-f https://download.pytorch.org/whl/cpu -f https://download.pytorch.org/whl/cpu
cachetools cachetools
cbor2
cloudpickle
fastapi
msgspec msgspec
openai
openai-harmony
partial-json-parser
pillow
psutil
pybase64
pydantic pydantic
setproctitle
torch torch
transformers
zmq
uvloop
prometheus-client
...@@ -27,7 +27,7 @@ mistral_common[image,audio] >= 1.8.2 # required for voxtral test ...@@ -27,7 +27,7 @@ mistral_common[image,audio] >= 1.8.2 # required for voxtral test
num2words # required for smolvlm test num2words # required for smolvlm test
opencv-python-headless >= 4.11.0 # required for video test opencv-python-headless >= 4.11.0 # required for video test
datamodel_code_generator # required for minicpm3 test datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.8 # required for model evaluation test lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
mteb>=1.38.11, <2 # required for mteb test mteb>=1.38.11, <2 # required for mteb test
transformers==4.52.4 transformers==4.52.4
tokenizers==0.21.1 tokenizers==0.21.1
......
# Common dependencies # Common dependencies
-r common.txt -r common.txt
--extra-index-url https://download.pytorch.org/whl/rocm6.2.4 --extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.7.0 torch==2.8.0
torchvision==0.22.0 torchvision==0.23.0
torchaudio==2.7.0 torchaudio==2.8.0
triton==3.2 triton==3.3.0
cmake>=3.26.1,<4 cmake>=3.26.1,<4
packaging>=24.2 packaging>=24.2
setuptools>=77.0.3,<80.0.0 setuptools>=77.0.3,<80.0.0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment