#!/bin/bash # ============================================================================== # vLLM Server 启动脚本(容器内运行) # 模型: openai-mirror/gpt-oss-120b (120B MoE) # ============================================================================== set -e MODEL_PATH="{model_PATH}" PORT="${1:-8000}" echo "==========================================" echo "vLLM Server 启动配置" echo "==========================================" echo "模型路径: $MODEL_PATH" echo "服务端口: $PORT" echo "==========================================" export HIP_VISIBLE_DEVICES=0,1,2,3 export MOE_NN=0 vllm serve $MODEL_PATH \ --host 0.0.0.0 \ --port $PORT \ --tensor-parallel-size 4 \ --gpu-memory-utilization 0.90 \ --max-model-len 4096 \ --enforce-eager \ --enable-chunked-prefill \ --max-num-batched-tokens 8192