vllm serve ServiceNow-AI/Apriel-1.6-15b-Thinker --trust-remote-code --dtype bfloat16 --max-seq-len-to-capture 32768 -tp 1 --max-model-len 32768