run_vllm_server.sh 865 Bytes
Newer Older
dengjb's avatar
update  
dengjb committed
1
2
3
4
5
6
7
8
9
#!/bin/bash

# ==============================================================================
# vLLM Server 启动脚本(容器内运行)
# 模型: openai-mirror/gpt-oss-120b (120B MoE)
# ==============================================================================

set -e

dengjb's avatar
update  
dengjb committed
10
MODEL_PATH="{model_PATH}"
dengjb's avatar
update  
dengjb committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
PORT="${1:-8000}"

echo "=========================================="
echo "vLLM Server 启动配置"
echo "=========================================="
echo "模型路径: $MODEL_PATH"
echo "服务端口: $PORT"
echo "=========================================="
export HIP_VISIBLE_DEVICES=0,1,2,3
export MOE_NN=0

vllm serve $MODEL_PATH \
    --host 0.0.0.0 \
    --port $PORT \
    --tensor-parallel-size 4 \
    --gpu-memory-utilization 0.90 \
    --max-model-len 4096 \
    --enforce-eager \
    --enable-chunked-prefill \
    --max-num-batched-tokens 8192