Unverified Commit 34868b10 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Doc] Update Mistral Small 3.1/Pixtral example (#15184)


Signed-off-by: default avatarRoger Wang <ywang@roblox.com>
parent 1f16b7fe
......@@ -6,14 +6,16 @@ import argparse
from vllm import LLM
from vllm.sampling_params import SamplingParams
# This script is an offline demo for running Mistral-Small-3
# This script is an offline demo for running Mistral-Small-3.1
#
# If you want to run a server/client setup, please follow this code:
#
# - Server:
#
# ```bash
# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 \
# --tokenizer-mode mistral --config-format mistral --load-format mistral \
# --limit-mm-per-prompt 'image=4' --max-model-len 16384
# ```
#
# - Client:
......@@ -51,6 +53,8 @@ def run_simple_demo(args: argparse.Namespace):
llm = LLM(
model=model_name,
tokenizer_mode="mistral",
config_format="mistral",
load_format="mistral",
max_model_len=4096,
max_num_seqs=2,
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
......@@ -91,6 +95,8 @@ def run_advanced_demo(args: argparse.Namespace):
llm = LLM(
model=model_name,
tokenizer_mode="mistral",
config_format="mistral",
load_format="mistral",
limit_mm_per_prompt={"image": max_img_per_msg},
max_model_len=max_img_per_msg * max_tokens_per_img,
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment