#!/bin/bash # RealWorldQA Inference Script (Thinking Model) # This script runs inference on the RealWorldQA dataset using vLLM with thinking mode parameters python run_realworldqa.py infer \ --model-path /path/to/Qwen3-VL-Thinking \ --dataset RealWorldQA \ --data-dir /path/to/data \ --output-file results/RealWorldQA_results_thinking.jsonl \ --tensor-parallel-size 1 \ --gpu-memory-utilization 0.9 \ --max-model-len 128000 \ --max-images-per-prompt 10 \ --max-new-tokens 32768 \ --temperature 0.6 \ --top-p 0.95 \ --top-k 20 \ --repetition-penalty 1.0 \ --presence-penalty 0.0