#!/bin/bash # ============================================================================== # vLLM 推理测试脚本 # 模型: openai-mirror/gpt-oss-120b (120B MoE) # 容器: gpt-oss-120b-container # DCU节点: 10.16.6.52 # ============================================================================== set -e API_HOST="${1:-http://10.16.6.52:8000}" MODEL_NAME="${2:-openai-mirror/gpt-oss-120b}" echo "==========================================" echo "vLLM 推理测试" echo "==========================================" echo "API 地址: $API_HOST" echo "模型名称: $MODEL_NAME" echo "==========================================" echo "" echo "1. 检查服务健康状态..." curl -s "$API_HOST/health" && echo " ✓ 服务健康" || { echo " ✗ 服务未启动或不可用" echo "提示: 请先运行 run_vllm_server.sh 启动服务" exit 1 } echo "" echo "2. 获取模型信息..." curl -s "$API_HOST/v1/models" | python3 -m json.tool | head -20 || echo "获取模型信息失败" echo "" echo "3. 执行测试推理 - 简单问题..." curl -s "$API_HOST/v1/chat/completions" \ -H "Content-Type: application/json" \ -d "{ \"model\": \"$MODEL_NAME\", \"messages\": [ { \"role\": \"user\", \"content\": \"你好,请介绍一下你自己。\" } ], \"max_tokens\": 256, \"temperature\": 0.7, \"stream\": false }" | python3 -m json.tool echo "" echo "4. 执行测试推理 - 代码生成..." curl -s "$API_HOST/v1/chat/completions" \ -H "Content-Type: application/json" \ -d "{ \"model\": \"$MODEL_NAME\", \"messages\": [ { \"role\": \"user\", \"content\": \"用Python写一个快速排序算法\" } ], \"max_tokens\": 512, \"temperature\": 0.3, \"stream\": false }" | python3 -m json.tool echo "" echo "==========================================" echo "推理测试完成" echo "=========================================="