Commit f6a338d7 authored by jerrrrry's avatar jerrrrry
Browse files

Initial commit

parents
# 使用官方光源基础镜像
FROM image.sourcefind.cn:5000/dcu/admin/base/custom:vllm0.8.5-ubuntu22.04-dtk25.04.1-rc4-das1.6-py3.10-20250620-fixpy
# 安装基础工具
RUN apt-get update && \
apt-get install -y --no-install-recommends \
iproute2 \
dmidecode \
ipmitool \
git \
curl \
jq \
lshw \
iputils-ping \
pciutils \
sysstat \
locate \
&& rm -rf /var/lib/apt/lists/*
# 创建目录结构
RUN mkdir -p /workspace/scripts && \
mkdir -p /workspace/test/env_check_outputs && \
mkdir -p /workspace/test/env_check_tools
# 复制脚本并设置权限
COPY ./scripts/run_envcheck.sh /workspace/scripts/
COPY ./env_check_tools/dcu_env_check.zip /workspace/test/env_check_tools/
COPY ./env_check_tools/rccl-tests.zip /workspace/test/env_check_tools/
# 验证脚本可执行性
RUN ls -l /workspace/scripts/ && \
file /workspace/scripts/run_envcheck.sh && \
head -n 1 /workspace/scripts/run_envcheck.sh # 检查shebang
# 设置工作目录(建议直接设为脚本目录)
WORKDIR /workspace/scripts/
# 直接执行脚本(无需cd)
CMD bash -c "\
bash run_envcheck.sh"
\ No newline at end of file
#!/bin/bash
set -eo pipefail # 严格错误处理
log_dir="/workspace/test/env_check_outputs/"
mkdir -p "$log_dir"
echo "==================== 开始系统环境检查 ===================="
# 增强版检查函数 - 遇到错误继续执行
run_test() {
local name=$1
local chinese_name=$2
shift 2
echo "[RUN] $chinese_name"
if ! "$@" 2>&1 | tee "$log_dir/${chinese_name}.log"; then
echo "[WARN] $chinese_name 检查失败" | tee -a "$log_dir/${chinese_name}.log"
return 1 # 返回非零状态但不退出脚本
fi
return 0
}
run_pipe_test() {
local name=$1
local chinese_name=$2
local cmd=$3
echo "[RUN] $chinese_name"
if ! bash -c "$cmd" 2>&1 | tee "$log_dir/${chinese_name}.log"; then
echo "[WARN] $chinese_name 检查失败" | tee -a "$log_dir/${chinese_name}.log"
return 1
fi
return 0
}
# 安全执行函数 - 确保即使命令失败也不会中断脚本
safe_run() {
local section=$1
shift
echo "==================== $section ===================="
for cmd in "$@"; do
# 使用eval来正确处理带引号的命令
if ! eval "$cmd"; then
echo "[WARN] 命令执行失败: $cmd" | tee -a "$log_dir/error.log"
fi
done
}
# ------------------------- 1. 系统基础检查 -------------------------
safe_run "1.系统基础信息检查" \
'run_test uname "01_系统内核信息" uname -a' \
'run_test os_release "02_操作系统版本" cat /etc/os-release' \
'run_test locale "03_系统语言环境" locale'
# ------------------------- 2. CPU & 内存检查 -------------------------
safe_run "2.CPU_内存检查" \
'run_test cpu_info "04_CPU详细信息" lscpu' \
'run_test cpu_cores "05_CPU核心数" nproc' \
'run_pipe_test cpu_freq "06_CPU频率" "cat /proc/cpuinfo | grep \"MHz\" | sort -u"' \
'run_test memory_usage "07_内存使用情况" free -h' \
'run_test vm_stat "08_系统整体CPU和内存使用情况" vmstat 1 10' \
'run_test numa_nodes "09_NUMA节点信息" numactl --hardware || true' \
'run_pipe_test cpu_usage "10_CPU利用率检查" "mpstat -P ALL 1 5"' \
'run_pipe_test cpu_top_usage "11_CPU占用最高进程检查" "ps -eo pid,%cpu,cmd --sort=-%cpu | head -n 10"'
# ------------------------- 3. 存储设备检查 -------------------------
safe_run "3.存储设备检查" \
'run_test disk_usage "12_磁盘使用情况" df -hT' \
'run_test mount_info "13_挂载信息" mount | column -t' \
'run_test block_devices "14_块设备信息" lsblk -o NAME,SIZE,TYPE,MOUNTPOINT'
# ------------------------- 4. 网络检查 -------------------------
safe_run "4.网络检查" \
'run_test netstat "15_网络连接状态" ss -tulnp' \
'run_test network_interfaces "16_网络接口信息" ip -br a' \
'run_test routing_table "17_路由表信息" ip route' \
'run_test arp_table "18_ARP表信息" ip neigh' \
'run_test ibdev2netdev "19_InfiniBand设备映射" ibdev2netdev' \
'run_test topo "20_网卡-dcu-topo" lspci -vt '
# ------------------------- 5. DCU&内核&驱动检查 -------------------------
safe_run "5.DCU_内核_驱动检查" \
'run_test hy_smi "21_DCU设备状态" hy-smi' \
'run_test clock_level "22_DCU时钟级别" hy-smi -g' \
'run_test driverversion "23_DCU驱动版本" hy-smi --showdriverversion' \
'run_test rocminfo "24_ROCM信息" rocminfo' \
'run_test kernel_modules "25_已加载内核模块" lsmod' \
'run_test kernel_version "26_内核版本" uname -r'
# ------------------------- 6. 软件栈检查 -------------------------
safe_run "6.软件栈检查" \
'run_test pip_list "27_Python包列表" pip list' \
'run_test glibc_version "28_GLIBC版本" ldd --version'
# ------------------------- 7. 其他硬件状态检查 -------------------------
safe_run "7.其他硬件状态检查" \
'run_test lspci "29_PCI设备列表" lspci' \
'run_test iostat "30_IO统计信息" iostat' \
'run_test hardware_info "31_硬件摘要信息" lshw -short || true' \
'run_pipe_test ACS_stat "32_ACS状态检查" "lspci -vvv | grep -i acsct"' \
'run_test dmesg "33_内核日志" dmesg' \
'run_pipe_test pcie_topology "34_PCIe拓扑结构" "echo \"====== PCIe 桥接器 ======\"; lspci -vvv | grep -E \"PCI bridge|Root port\" -A 20 | grep -E \"Device|Vendor|LnkSta:|LnkCap:|Secondary bus\"; echo \"\"; echo \"====== PCIe 带宽汇总 ======\"; lspci -vvv | grep \"LnkSta:\" | sort | uniq -c"' \
'run_pipe_test storage_details "35_存储控制器详情" "echo \"====== 存储控制器 ======\"; lspci -vvv | grep -E \"NVMe|SATA|RAID|Storage controller\" -A 30 | grep -E \"Device|Vendor|Kernel driver|LnkSta:|Speed|Width|MSI|Bar Memory\""' \
'run_pipe_test nic_details "36_网卡详细信息" "echo \"====== 网卡详细信息 ======\"; lspci -vvv | grep -E \"Ethernet controller|Network controller|InfiniBand\" -A 50 | grep -E \"Device|Vendor|Subsystem|Kernel driver|Kernel modules|LnkSta:|LnkCap:|NUMA node|Speed|Width\""' \
'run_pipe_test iommu_stat "37_IOMMU状态" "dmesg | grep IOMMU"' \
'run_pipe_test SELinux_stat "38_SELinux状态" "dmesg | grep SELinux"'
# ------------------------- 8. 带宽检查 -------------------------
source /opt/dtk/env.sh
safe_run "8.带宽检查" \
'run_test D2D-a_test "39_D2D单向带宽测试" /opt/dtk/bin/BandwidthTest -a -s 512MB ' \
'run_test D2D-A_test "40_D2D双向带宽测试" /opt/dtk/bin/BandwidthTest -A -s 512MB ' \
'run_test D2H-H2D_test "41_D2H和H2D带宽测试" /opt/dtk/bin/BandwidthTest -t 3 ' \
'cd /workspace/test/env_check_tools/ || { echo "[ERROR] 无法进入/workspace/test/env_check_tools/目录"; exit 1; }' \
'if [ -f "rccl-tests.zip" ]; then
echo "[INFO] 发现 rccl-tests.zip,开始解压..."
unzip -o rccl-tests.zip -d rccl-tests || {
echo "[ERROR] rccl-tests.zip 解压失败" | tee "$log_dir/42_RCCL测试解压失败.log"
exit 1
}
cd rccl-tests/rccl-tests || { echo "[ERROR] 无法进入rccl-tests目录"; exit 1; }
if make MPI=1 MPI_HOME=/opt/mpi ROCM_HOME=/opt/dtk NCCL_HOME=/opt/dtk/rccl \
CUSTOM_RCCL_LIB=/opt/dtk/rccl/lib/librccl.so -j32; then
./build/all_reduce_perf -b 8 -e 1G -f 2 -g 8 2>&1 | tee "$log_dir/43_RCCL_all_reduce_8卡测试.log" || true
./build/all_reduce_perf -b 4 -e 1G -f 2 -g 4 2>&1 | tee "$log_dir/44_RCCL_all_reduce_4卡测试.log" || true
else
echo "[ERROR] RCCL编译失败" | tee "$log_dir/45_RCCL编译失败.log"
fi
cd ../..
else
echo "[WARN] 未找到 rccl-tests.zip,跳过 RCCL 测试" | tee "$log_dir/46_RCCL测试跳过.log"
fi'
# ------------------------- 9.DCU环境检查 -------------------------
safe_run "9.DCU环境检查" \
'cd /workspace/test/env_check_tools/ || { echo "[ERROR] 无法进入/workspace/test/env_check_tools/目录"; exit 1; }' \
'if [ -f "dcu_env_check.zip" ]; then
echo "[INFO] 发现 dcu_env_check.zip,开始解压..."
unzip -o dcu_env_check.zip -d dcu_env_check || {
echo "[ERROR] dcu_env_check.zip 解压失败" | tee "$log_dir/47_DCU环境检查解压失败.log"
exit 1
}
chmod +x dcu_env_check/dcu_env_check-main/tools/*
cd dcu_env_check/dcu_env_check-main && {
bash system_check.sh 2>&1 | tee "$log_dir/48_DCU环境检查结果.log" || true
cp system_info* /workspace/test/env_check_outputs/ || true
cd ../..
} || {
echo "[ERROR] DCU环境检查执行失败" | tee "$log_dir/49_DCU环境检查执行失败.log"
}
else
echo "[WARN] 未找到 dcu_env_check.zip,跳过 DCU 环境检查" | tee "$log_dir/50_DCU环境检查跳过.log"
fi'
echo "==================== 检查完成 ===================="
echo "所有日志已保存至: $log_dir"
ls -lh "$log_dir"
\ No newline at end of file
docker build -t env_check . && \
docker run \
-v /usr/local/hyhal:/usr/local/hyhal:ro \
-v /opt/hyhal:/opt/hyhal:ro \
-v $PWD/outputs/env_check_outputs:/workspace/test/env_check_outputs/ \
--ipc=host \
--network=host \
--cap-add=SYS_PTRACE \
--group-add video \
--ulimit memlock=-1:-1 \
--privileged \
--device=/dev/kfd \
--device=/dev/mkfd \
--device=/dev/dri \
--shm-size=500G \
-u root \
--security-opt seccomp=unconfined \
env_check \
\ No newline at end of file
# 使用官方光源基础镜像
FROM image.sourcefind.cn:5000/dcu/admin/base/custom:vllm0.8.5-ubuntu22.04-dtk25.04.1-rc4-das1.6-py3.10-20250620-fixpy
# 安装基础工具
RUN apt-get update && \
apt-get install -y --no-install-recommends \
iproute2 \
dmidecode \
ipmitool \
git \
curl \
jq \
lshw \
iputils-ping \
pciutils \
sysstat \
locate \
&& rm -rf /var/lib/apt/lists/*
# 创建目录结构
RUN mkdir -p /workspace/scripts && \
mkdir -p /workspace/configs && \
mkdir -p /workspace/test/env_check_outputs && \
mkdir -p /workspace/test/inference_outputs && \
mkdir -p /workspace/test/models && \
mkdir -p /workspace/test/env_check_tools
# 复制脚本
COPY ./scripts/* /workspace/scripts/
COPY ./configs/* /workspace/configs/
COPY ./env_check_tools/dcu_env_check.zip /workspace/test/env_check_tools/
COPY ./env_check_tools/rccl-tests.zip /workspace/test/env_check_tools/
RUN chmod +x /workspace/scripts/*
RUN chmod +x /workspace/configs*
# 设置工作目录(建议直接设为脚本目录)
WORKDIR /workspace/scripts/
# 直接执行脚本(无需cd)
CMD bash -c "\
bash entrypoint.sh"
\ No newline at end of file
# 格式: 模型ID;本地保存路径
#模型ID为modelscope官网指定的id
Qwen/Qwen3-0.6B;/workspace/test/models/Qwen/Qwen3-0.6B
\ No newline at end of file
# 格式:model_name,model_path;tp;data_type;batch_list;prompt_completion_pairs;max_model_len;gpu_memory_utilization
# 每行一个模型配置,字段用分号分隔
Qwen3-0.6B;/workspace/test/models/Qwen/Qwen3-0.6B;1;float16;"1 ";(512 512,512 1024);40000;0.95
\ No newline at end of file
# SPDX-License-Identifier: Apache-2.0
import json
import os
import sys
import time
import traceback
from dataclasses import dataclass, field
from typing import Optional, Union
import aiohttp
import huggingface_hub.constants
from tqdm.asyncio import tqdm
from transformers import (AutoTokenizer, PreTrainedTokenizer,
PreTrainedTokenizerFast)
# NOTE(simon): do not import vLLM here so the benchmark script
# can run without vLLM installed.
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
@dataclass
class RequestFuncInput:
prompt: str
api_url: str
prompt_len: int
output_len: int
model: str
model_name: Optional[str] = None
logprobs: Optional[int] = None
extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None
ignore_eos: bool = False
@dataclass
class RequestFuncOutput:
generated_text: str = ""
success: bool = False
latency: float = 0.0
output_tokens: int = 0
ttft: float = 0.0 # Time to first token
itl: list[float] = field(
default_factory=list) # list of inter-token latencies
tpot: float = 0.0 # avg next-token latencies
prompt_len: int = 0
error: str = ""
async def async_request_tgi(
request_func_input: RequestFuncInput,
pbar: Optional[tqdm] = None,
) -> RequestFuncOutput:
api_url = request_func_input.api_url
assert api_url.endswith("generate_stream")
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
params = {
"max_new_tokens": request_func_input.output_len,
"do_sample": True,
"temperature": 0.01, # TGI does not accept 0.0 temperature.
"top_p": 0.99, # TGI does not accept 1.0 top_p.
"truncate": request_func_input.prompt_len,
"ignore_eos_token": request_func_input.ignore_eos,
}
payload = {
"inputs": request_func_input.prompt,
"parameters": params,
}
output = RequestFuncOutput()
output.prompt_len = request_func_input.prompt_len
if request_func_input.ignore_eos:
output.output_tokens = request_func_input.output_len
else:
output.output_tokens = None
ttft = 0.0
st = time.perf_counter()
most_recent_timestamp = st
try:
async with session.post(url=api_url, json=payload) as response:
if response.status == 200:
async for chunk_bytes in response.content:
chunk_bytes = chunk_bytes.strip()
if not chunk_bytes:
continue
chunk_bytes = chunk_bytes.decode("utf-8")
# NOTE: Sometimes TGI returns a ping response without
# any data, we should skip it.
if chunk_bytes.startswith(":"):
continue
chunk = chunk_bytes.removeprefix("data:")
data = json.loads(chunk)
timestamp = time.perf_counter()
# First token
if ttft == 0.0:
ttft = time.perf_counter() - st
output.ttft = ttft
# Decoding phase
else:
output.itl.append(timestamp -
most_recent_timestamp)
most_recent_timestamp = timestamp
output.latency = most_recent_timestamp - st
output.success = True
output.generated_text = data["generated_text"]
else:
output.error = response.reason or ""
output.success = False
except Exception:
output.success = False
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))
if pbar:
pbar.update(1)
return output
async def async_request_trt_llm(
request_func_input: RequestFuncInput,
pbar: Optional[tqdm] = None,
) -> RequestFuncOutput:
api_url = request_func_input.api_url
assert api_url.endswith("generate_stream")
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
payload = {
"accumulate_tokens": True,
"text_input": request_func_input.prompt,
"temperature": 0.0,
"top_p": 1.0,
"max_tokens": request_func_input.output_len,
"stream": True,
}
if request_func_input.ignore_eos:
payload["min_length"] = request_func_input.output_len
output = RequestFuncOutput()
output.prompt_len = request_func_input.prompt_len
ttft = 0.0
st = time.perf_counter()
most_recent_timestamp = st
try:
async with session.post(url=api_url, json=payload) as response:
if response.status == 200:
async for chunk_bytes in response.content:
chunk_bytes = chunk_bytes.strip()
if not chunk_bytes:
continue
chunk = chunk_bytes.decode("utf-8").removeprefix(
"data:")
data = json.loads(chunk)
output.generated_text += data["text_output"]
timestamp = time.perf_counter()
# First token
if ttft == 0.0:
ttft = timestamp - st
output.ttft = ttft
# Decoding phase
else:
output.itl.append(timestamp -
most_recent_timestamp)
most_recent_timestamp = timestamp
output.latency = most_recent_timestamp - st
output.success = True
else:
output.error = response.reason or ""
output.success = False
except Exception:
output.success = False
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))
if pbar:
pbar.update(1)
return output
async def async_request_deepspeed_mii(
request_func_input: RequestFuncInput,
pbar: Optional[tqdm] = None,
) -> RequestFuncOutput:
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
payload = {
"prompt": request_func_input.prompt,
"max_tokens": request_func_input.output_len,
"temperature": 0.01, # deepspeed-mii does not accept 0.0 temp.
"top_p": 1.0,
}
output = RequestFuncOutput()
output.prompt_len = request_func_input.prompt_len
# NOTE: DeepSpeed-MII doesn't support streaming as of Jan 28 2024,
# will use 0 as placeholder.
# See https://github.com/microsoft/DeepSpeed-MII/pull/311
output.ttft = 0
st = time.perf_counter()
try:
async with session.post(url=request_func_input.api_url,
json=payload) as response:
if response.status == 200:
parsed_resp = await response.json()
output.latency = time.perf_counter() - st
if "choices" in parsed_resp:
output.generated_text = parsed_resp["choices"][0][
"text"]
elif "text" in parsed_resp:
output.generated_text = parsed_resp["text"][0]
else:
output.error = ("Unexpected response format: "
"neither 'choices' nor 'text' found")
output.success = False
output.success = True
else:
output.error = response.reason or ""
output.success = False
except Exception:
output.success = False
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))
if pbar:
pbar.update(1)
return output
async def async_request_openai_completions(
request_func_input: RequestFuncInput,
pbar: Optional[tqdm] = None,
) -> RequestFuncOutput:
api_url = request_func_input.api_url
assert api_url.endswith(
("completions", "profile")
), "OpenAI Completions API URL must end with 'completions' or 'profile'."
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
payload = {
"model": request_func_input.model_name \
if request_func_input.model_name else request_func_input.model,
"prompt": request_func_input.prompt,
"temperature": 0.0,
"max_tokens": request_func_input.output_len,
"logprobs": request_func_input.logprobs,
"stream": True,
"stream_options": {
"include_usage": True,
},
}
if request_func_input.ignore_eos:
payload["ignore_eos"] = request_func_input.ignore_eos
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}
output = RequestFuncOutput()
output.prompt_len = request_func_input.prompt_len
generated_text = ""
st = time.perf_counter()
most_recent_timestamp = st
try:
async with session.post(url=api_url, json=payload,
headers=headers) as response:
if response.status == 200:
first_chunk_received = False
async for chunk_bytes in response.content:
chunk_bytes = chunk_bytes.strip()
if not chunk_bytes:
continue
chunk = chunk_bytes.decode("utf-8").removeprefix(
"data: ")
if chunk != "[DONE]":
data = json.loads(chunk)
# NOTE: Some completion API might have a last
# usage summary response without a token so we
# want to check a token was generated
if choices := data.get("choices"):
# Note that text could be empty here
# e.g. for special tokens
text = choices[0].get("text")
timestamp = time.perf_counter()
# First token
if not first_chunk_received:
first_chunk_received = True
ttft = time.perf_counter() - st
output.ttft = ttft
# Decoding phase
else:
output.itl.append(timestamp -
most_recent_timestamp)
most_recent_timestamp = timestamp
generated_text += text or ""
elif usage := data.get("usage"):
output.output_tokens = usage.get(
"completion_tokens")
if first_chunk_received:
output.success = True
else:
output.success = False
output.error = (
"Never received a valid chunk to calculate TTFT."
"This response will be marked as failed!")
output.generated_text = generated_text
output.latency = most_recent_timestamp - st
else:
output.error = response.reason or ""
output.success = False
except Exception:
output.success = False
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))
if pbar:
pbar.update(1)
return output
async def async_request_openai_chat_completions(
request_func_input: RequestFuncInput,
pbar: Optional[tqdm] = None,
) -> RequestFuncOutput:
api_url = request_func_input.api_url
assert api_url.endswith(
("chat/completions", "profile")
), "OpenAI Chat Completions API URL must end with 'chat/completions'."
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
content = [{"type": "text", "text": request_func_input.prompt}]
if request_func_input.multi_modal_content:
content.append(request_func_input.multi_modal_content)
payload = {
"model": request_func_input.model_name \
if request_func_input.model_name else request_func_input.model,
"messages": [
{
"role": "user",
"content": content
},
],
"temperature": 0.0,
"max_completion_tokens": request_func_input.output_len,
"stream": True,
"stream_options": {
"include_usage": True,
},
}
if request_func_input.ignore_eos:
payload["ignore_eos"] = request_func_input.ignore_eos
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
}
output = RequestFuncOutput()
output.prompt_len = request_func_input.prompt_len
generated_text = ""
ttft = 0.0
st = time.perf_counter()
most_recent_timestamp = st
try:
async with session.post(url=api_url, json=payload,
headers=headers) as response:
if response.status == 200:
async for chunk_bytes in response.content:
chunk_bytes = chunk_bytes.strip()
if not chunk_bytes:
continue
chunk = chunk_bytes.decode("utf-8").removeprefix(
"data: ")
if chunk != "[DONE]":
timestamp = time.perf_counter()
data = json.loads(chunk)
if choices := data.get("choices"):
content = choices[0]["delta"].get("content")
# First token
if ttft == 0.0:
ttft = timestamp - st
output.ttft = ttft
# Decoding phase
else:
output.itl.append(timestamp -
most_recent_timestamp)
generated_text += content or ""
elif usage := data.get("usage"):
output.output_tokens = usage.get(
"completion_tokens")
most_recent_timestamp = timestamp
output.generated_text = generated_text
output.success = True
output.latency = most_recent_timestamp - st
else:
output.error = response.reason or ""
output.success = False
except Exception:
output.success = False
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))
if pbar:
pbar.update(1)
return output
def get_model(pretrained_model_name_or_path: str) -> str:
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
from modelscope import snapshot_download
from vllm.model_executor.model_loader.weight_utils import get_lock
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with get_lock(pretrained_model_name_or_path):
model_path = snapshot_download(
model_id=pretrained_model_name_or_path,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"])
return model_path
return pretrained_model_name_or_path
def get_tokenizer(
pretrained_model_name_or_path: str,
tokenizer_mode: str = "auto",
trust_remote_code: bool = False,
**kwargs,
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
if pretrained_model_name_or_path is not None and not os.path.exists(
pretrained_model_name_or_path):
pretrained_model_name_or_path = get_model(
pretrained_model_name_or_path)
if tokenizer_mode == "slow":
if kwargs.get("use_fast", False):
raise ValueError(
"Cannot use the fast tokenizer in slow tokenizer mode.")
kwargs["use_fast"] = False
if tokenizer_mode == "mistral":
try:
from vllm.transformers_utils.tokenizer import MistralTokenizer
except ImportError as e:
raise ImportError("MistralTokenizer requires vllm package.\n"
"Please install it with `pip install vllm` "
"to use mistral tokenizer mode.") from e
return MistralTokenizer.from_pretrained(
str(pretrained_model_name_or_path))
else:
return AutoTokenizer.from_pretrained(
pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
**kwargs,
)
ASYNC_REQUEST_FUNCS = {
"tgi": async_request_tgi,
"vllm": async_request_openai_completions,
"lmdeploy": async_request_openai_completions,
"deepspeed-mii": async_request_deepspeed_mii,
"openai": async_request_openai_completions,
"openai-chat": async_request_openai_chat_completions,
"tensorrt-llm": async_request_trt_llm,
"scalellm": async_request_openai_completions,
"sglang": async_request_openai_completions,
}
OPENAI_COMPATIBLE_BACKENDS = [
k for k, v in ASYNC_REQUEST_FUNCS.items()
if v in (async_request_openai_completions,
async_request_openai_chat_completions)
]
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import math
import os
from typing import Any
def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
metrics: dict[str, list],
extra_info: dict[str, Any]) -> list:
"""
Save the benchmark results in the format used by PyTorch OSS benchmark with
on metric per record
https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
"""
records = []
if not os.environ.get("SAVE_TO_PYTORCH_BENCHMARK_FORMAT", False):
return records
for name, benchmark_values in metrics.items():
record = {
"benchmark": {
"name": "vLLM benchmark",
"extra_info": {
"args": vars(args),
},
},
"model": {
"name": args.model,
},
"metric": {
"name": name,
"benchmark_values": benchmark_values,
"extra_info": extra_info,
},
}
tp = record["benchmark"]["extra_info"]["args"].get(
"tensor_parallel_size")
# Save tensor_parallel_size parameter if it's part of the metadata
if not tp and "tensor_parallel_size" in extra_info:
record["benchmark"]["extra_info"]["args"][
"tensor_parallel_size"] = extra_info["tensor_parallel_size"]
records.append(record)
return records
class InfEncoder(json.JSONEncoder):
def clear_inf(self, o: Any):
if isinstance(o, dict):
return {k: self.clear_inf(v) for k, v in o.items()}
elif isinstance(o, list):
return [self.clear_inf(v) for v in o]
elif isinstance(o, float) and math.isinf(o):
return "inf"
return o
def iterencode(self, o: Any, *args, **kwargs) -> Any:
return super().iterencode(self.clear_inf(o), *args, **kwargs)
def write_to_json(filename: str, records: list) -> None:
with open(filename, "w") as f:
json.dump(records, f, cls=InfEncoder)
#!/bin/bash
# ModelScope CLI批量下载脚本
# 使用说明: ./ms_download.sh -f 模型列表.cfg [-F 强制重新下载]
pip install modelscope -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# 参数解析
CONFIG_FILE=""
FORCE_DOWNLOAD=false
MODELSCOPE_CMD="modelscope download"
while getopts "f:F" opt; do
case $opt in
f) CONFIG_FILE="$OPTARG" ;;
F) FORCE_DOWNLOAD=true ;;
*) echo "Usage: $0 -f config.cfg [-F]" >&2
exit 1
esac
done
# 检查配置文件
if [ ! -f "$CONFIG_FILE" ]; then
echo "Error: Config file $CONFIG_FILE not found!" >&2
exit 1
fi
# 检查modelscope是否安装
if ! command -v modelscope &> /dev/null; then
echo "Error: modelscope CLI not installed. Please install with: pip install modelscope" >&2
exit 1
fi
# 读取配置文件
TOTAL=0
SUCCESS=0
FAILED=0
echo "=== Starting batch download ==="
while IFS=';' read -r model_id local_dir || [[ -n "$model_id" ]]; do
# 跳过空行和注释
[[ -z "$model_id" || "$model_id" =~ ^# ]] && continue
((TOTAL++))
# 清理变量
model_id=$(echo "$model_id" | xargs)
local_dir=$(echo "$local_dir" | xargs)
echo -e "\n[Progress] $TOTAL. Downloading $model_id"
echo "[Location] $local_dir"
# 检查目录是否存在
if [ "$FORCE_DOWNLOAD" = false ] && [ -d "$local_dir" ]; then
echo "[Status] Skipped (already exists)"
((SUCCESS++))
continue
fi
# 创建目录
mkdir -p "$local_dir" || {
echo "[Error] Failed to create directory $local_dir" >&2
((FAILED++))
continue
}
# 执行下载命令
if $MODELSCOPE_CMD --model "$model_id" --local_dir "$local_dir"; then
echo "[Status] Download successful"
((SUCCESS++))
else
echo "[Error] Download failed" >&2
((FAILED++))
# 删除空目录防止残留
rmdir "$local_dir" 2>/dev/null
fi
done < "$CONFIG_FILE"
# 结果统计
echo -e "\n=== Download summary ==="
echo "Total: $TOTAL"
echo "Success: $SUCCESS"
echo "Failed: $FAILED"
# 退出状态
if [ "$FAILED" -gt 0 ]; then
exit 1
else
exit 0
fi
\ No newline at end of file
#!/bin/bash
# 执行环境检查
echo "==================== 开始系统环境检查 ===================="
/workspace/scripts/run_envcheck.sh
# 下载模型
echo "==================== 开始模型下载 ===================="
/workspace/scripts/download_model.sh -f /workspace/configs/download-list.cfg
# 运行性能测试
echo "==================== 开始性能测试 ===================="
/workspace/scripts/run_benchmark.sh
echo "==================== 所有测试完成 ===================="
\ No newline at end of file
#!/bin/bash
# 初始化目录
mkdir -p /workspace/test/inference_outputs/results
mkdir -p /workspace/test/inference_outputs/logs/server
mkdir -p /workspace/test/inference_outputs/logs/models
# 基础端口
BASE_PORT=8001
# 读取配置文件(分号分隔)
while IFS=';' read -r model_name model_path tp data_type batch_list prompt_pairs max_model_len gpu_mem_util; do
# 清理参数(去除空格和引号)
model_name=$(echo "$model_name" | xargs)
model_path=$(echo "$model_path" | xargs)
tp=$(echo "$tp" | xargs)
data_type=$(echo "$data_type" | xargs)
batch_list=$(echo "$batch_list" | tr -d '"' | xargs)
prompt_pairs=$(echo "$prompt_pairs" | tr -d '()"' | xargs)
max_model_len=$(echo "$max_model_len" | xargs)
gpu_mem_util=$(echo "$gpu_mem_util" | xargs)
# 动态分配端口
port=$((BASE_PORT++))
# 生成 server.sh
cat > "server_${model_name}_tp${tp}.sh" <<EOF
#!/bin/bash
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export ALLREDUCE_STREAM_WITH_COMPUTE=1
export NCCL_MIN_NCHANNELS=16
export NCCL_MAX_NCHANNELS=16
export VLLM_PCIE_USE_CUSTOM_ALLREDUCE=1
export VLLM_USE_TRITON_PREFIX_FLASH_ATTN=1
export VLLM_NUMA_BIND=1
export VLLM_RANK0_NUMA=0
export VLLM_RANK1_NUMA=1
export VLLM_RANK2_NUMA=2
export VLLM_RANK3_NUMA=3
export VLLM_RANK4_NUMA=4
export VLLM_RANK5_NUMA=5
export VLLM_RANK6_NUMA=6
export VLLM_RANK7_NUMA=7
vllm serve "$model_path" --trust-remote-code \\
--enable-prefix-caching \\
--dtype $data_type \\
--tensor-parallel-size $tp \\
--max-model-len $max_model_len \\
--port $port \\
--gpu-memory-utilization $gpu_mem_util
EOF
# 赋予执行权限
chmod +x "server_${model_name}_tp${tp}.sh"
echo "Generated server script for ${model_name}_tp${tp} at server_${model_name}_tp${tp}.sh"
# 1. 启动 vLLM 服务,并记录日志到 server.log
./server_${model_name}_tp${tp}.sh > "/workspace/test/inference_outputs/logs/server/${model_name}_tp${tp}_server.log" 2>&1 &
SERVER_PID=$!
# 2. 改进的日志检测函数
check_server_status() {
local log_file=$1
local server_pid=$2
local success_msg="Starting vLLM API server on http://0.0.0.0"
local error_patterns=("RuntimeError" "ValueError" "segmentation fault" "core dumped" )
# 检查成功消息
if grep -q "$success_msg" "$log_file"; then
echo "✅ Server started successfully!"
return 0
fi
# 检查错误消息
for pattern in "${error_patterns[@]}"; do
if grep -i -q "$pattern" "$log_file"; then
echo -e "\n❌ [$(date '+%Y-%m-%d %H:%M:%S')] Error detected in logs (pattern: $pattern)!"
echo "===== ERROR CONTEXT ====="
grep -i -A 5 -B 5 "$pattern" "$log_file" | tail -n 20
echo "========================="
return 1
fi
done
# 检查进程是否存活
if ! kill -0 $server_pid 2>/dev/null; then
echo -e "\n❌ [$(date '+%Y-%m-%d %H:%M:%S')] Server process died unexpectedly!"
echo "===== LAST LOG LINES ====="
tail -n 20 "$log_file"
echo "========================="
return 1
fi
# 默认返回继续等待
return 2
}
# 3. 等待服务器启动或失败
echo -e "\n🔍 [$(date '+%Y-%m-%d %H:%M:%S')] Starting monitoring for ${model_name}_tp${tp} (PID: $SERVER_PID)"
max_wait_seconds=20000
start_time=$(date +%s)
log_file="/workspace/test/inference_outputs/logs/server/${model_name}_tp${tp}_server.log"
while true; do
sleep 20 # 每20秒检查一次
check_server_status "$log_file" "$SERVER_PID"
status=$?
# 成功状态
if [ $status -eq 0 ]; then
break
fi
# 失败状态
if [ $status -eq 1 ]; then
# 清理资源
kill $SERVER_PID 2>/dev/null
pkill -f "vllm serve" 2>/dev/null
echo "🛑 Cleaned up resources after failure"
# 直接继续下一个模型测试
continue 2
fi
# 检查超时
current_time=$(date +%s)
elapsed=$((current_time - start_time))
if [ $elapsed -ge $max_wait_seconds ]; then
echo -e "\n⏰ [$(date '+%Y-%m-%d %H:%M:%S')] Timeout waiting for server to start!"
# 清理资源
kill $SERVER_PID 2>/dev/null
pkill -f "vllm serve" 2>/dev/null
echo "🛑 Cleaned up resources after timeout"
# 直接继续下一个模型测试
continue 2
fi
echo "Waiting... (${elapsed}s elapsed)"
done
# 4. 只有成功启动时才执行测试
echo -e "\n🚀 [$(date '+%Y-%m-%d %H:%M:%S')] Running tests for ${model_name}_tp${tp}..."
# 设置测试环境变量
export MODEL_NAME="$model_name"
export MODEL_PATH="$model_path"
export TP="$tp"
export DATA_TYPE="$data_type"
export BATCH_LIST="$batch_list"
export PROMPT_PAIRS="$prompt_pairs"
export PORT="$port"
# 运行测试
./test.sh
# 5. 测试完成后清理
kill $SERVER_PID
pkill -f "vllm serve" 2>/dev/null
echo "✅ [$(date '+%Y-%m-%d %H:%M:%S')] ${model_name}_tp${tp} test completed and cleaned up"
done < <(grep -v '^#' ../configs/model_to_test.cfg | grep -v '^$')
echo -e "\n📊 [$(date '+%Y-%m-%d %H:%M:%S')] All tests completed. Results saved to results/"
\ No newline at end of file
#!/bin/bash
set -eo pipefail # 严格错误处理
log_dir="/workspace/test/env_check_outputs/"
mkdir -p "$log_dir"
echo "==================== 开始系统环境检查 ===================="
# 增强版检查函数 - 遇到错误继续执行
run_test() {
local name=$1
local chinese_name=$2
shift 2
echo "[RUN] $chinese_name"
if ! "$@" 2>&1 | tee "$log_dir/${chinese_name}.log"; then
echo "[WARN] $chinese_name 检查失败" | tee -a "$log_dir/${chinese_name}.log"
return 1 # 返回非零状态但不退出脚本
fi
return 0
}
run_pipe_test() {
local name=$1
local chinese_name=$2
local cmd=$3
echo "[RUN] $chinese_name"
if ! bash -c "$cmd" 2>&1 | tee "$log_dir/${chinese_name}.log"; then
echo "[WARN] $chinese_name 检查失败" | tee -a "$log_dir/${chinese_name}.log"
return 1
fi
return 0
}
# 安全执行函数 - 确保即使命令失败也不会中断脚本
safe_run() {
local section=$1
shift
echo "==================== $section ===================="
for cmd in "$@"; do
# 使用eval来正确处理带引号的命令
if ! eval "$cmd"; then
echo "[WARN] 命令执行失败: $cmd" | tee -a "$log_dir/error.log"
fi
done
}
# ------------------------- 1. 系统基础检查 -------------------------
safe_run "1.系统基础信息检查" \
'run_test uname "01_系统内核信息" uname -a' \
'run_test os_release "02_操作系统版本" cat /etc/os-release' \
'run_test locale "03_系统语言环境" locale'
# ------------------------- 2. CPU & 内存检查 -------------------------
safe_run "2.CPU_内存检查" \
'run_test cpu_info "04_CPU详细信息" lscpu' \
'run_test cpu_cores "05_CPU核心数" nproc' \
'run_pipe_test cpu_freq "06_CPU频率" "cat /proc/cpuinfo | grep \"MHz\" | sort -u"' \
'run_test memory_usage "07_内存使用情况" free -h' \
'run_test vm_stat "08_系统整体CPU和内存使用情况" vmstat 1 10' \
'run_test numa_nodes "09_NUMA节点信息" numactl --hardware || true' \
'run_pipe_test cpu_usage "10_CPU利用率检查" "mpstat -P ALL 1 5"' \
'run_pipe_test cpu_top_usage "11_CPU占用最高进程检查" "ps -eo pid,%cpu,cmd --sort=-%cpu | head -n 10"'
# ------------------------- 3. 存储设备检查 -------------------------
safe_run "3.存储设备检查" \
'run_test disk_usage "12_磁盘使用情况" df -hT' \
'run_test mount_info "13_挂载信息" mount | column -t' \
'run_test block_devices "14_块设备信息" lsblk -o NAME,SIZE,TYPE,MOUNTPOINT'
# ------------------------- 4. 网络检查 -------------------------
safe_run "4.网络检查" \
'run_test netstat "15_网络连接状态" ss -tulnp' \
'run_test network_interfaces "16_网络接口信息" ip -br a' \
'run_test routing_table "17_路由表信息" ip route' \
'run_test arp_table "18_ARP表信息" ip neigh' \
'run_test ibdev2netdev "19_InfiniBand设备映射" ibdev2netdev' \
'run_test topo "20_网卡-dcu-topo" lspci -vt '
# ------------------------- 5. DCU&内核&驱动检查 -------------------------
safe_run "5.DCU_内核_驱动检查" \
'run_test hy_smi "21_DCU设备状态" hy-smi' \
'run_test clock_level "22_DCU时钟级别" hy-smi -g' \
'run_test driverversion "23_DCU驱动版本" hy-smi --showdriverversion' \
'run_test rocminfo "24_ROCM信息" rocminfo' \
'run_test kernel_modules "25_已加载内核模块" lsmod' \
'run_test kernel_version "26_内核版本" uname -r'
# ------------------------- 6. 软件栈检查 -------------------------
safe_run "6.软件栈检查" \
'run_test pip_list "27_Python包列表" pip list' \
'run_test glibc_version "28_GLIBC版本" ldd --version'
# ------------------------- 7. 其他硬件状态检查 -------------------------
safe_run "7.其他硬件状态检查" \
'run_test lspci "29_PCI设备列表" lspci' \
'run_test iostat "30_IO统计信息" iostat' \
'run_test hardware_info "31_硬件摘要信息" lshw -short || true' \
'run_pipe_test ACS_stat "32_ACS状态检查" "lspci -vvv | grep -i acsct"' \
'run_test dmesg "33_内核日志" dmesg' \
'run_pipe_test pcie_topology "34_PCIe拓扑结构" "echo \"====== PCIe 桥接器 ======\"; lspci -vvv | grep -E \"PCI bridge|Root port\" -A 20 | grep -E \"Device|Vendor|LnkSta:|LnkCap:|Secondary bus\"; echo \"\"; echo \"====== PCIe 带宽汇总 ======\"; lspci -vvv | grep \"LnkSta:\" | sort | uniq -c"' \
'run_pipe_test storage_details "35_存储控制器详情" "echo \"====== 存储控制器 ======\"; lspci -vvv | grep -E \"NVMe|SATA|RAID|Storage controller\" -A 30 | grep -E \"Device|Vendor|Kernel driver|LnkSta:|Speed|Width|MSI|Bar Memory\""' \
'run_pipe_test nic_details "36_网卡详细信息" "echo \"====== 网卡详细信息 ======\"; lspci -vvv | grep -E \"Ethernet controller|Network controller|InfiniBand\" -A 50 | grep -E \"Device|Vendor|Subsystem|Kernel driver|Kernel modules|LnkSta:|LnkCap:|NUMA node|Speed|Width\""' \
'run_pipe_test iommu_stat "37_IOMMU状态" "dmesg | grep IOMMU"' \
'run_pipe_test SELinux_stat "38_SELinux状态" "dmesg | grep SELinux"'
# ------------------------- 8. 带宽检查 -------------------------
source /opt/dtk/env.sh
safe_run "8.带宽检查" \
'run_test D2D-a_test "39_D2D单向带宽测试" /opt/dtk/bin/BandwidthTest -a -s 512MB ' \
'run_test D2D-A_test "40_D2D双向带宽测试" /opt/dtk/bin/BandwidthTest -A -s 512MB ' \
'run_test D2H-H2D_test "41_D2H和H2D带宽测试" /opt/dtk/bin/BandwidthTest -t 3 ' \
'cd /workspace/test/env_check_tools/ || { echo "[ERROR] 无法进入/workspace/test/env_check_tools/目录"; exit 1; }' \
'if [ -f "rccl-tests.zip" ]; then
echo "[INFO] 发现 rccl-tests.zip,开始解压..."
unzip -o rccl-tests.zip -d rccl-tests || {
echo "[ERROR] rccl-tests.zip 解压失败" | tee "$log_dir/42_RCCL测试解压失败.log"
exit 1
}
cd rccl-tests/rccl-tests || { echo "[ERROR] 无法进入rccl-tests目录"; exit 1; }
if make MPI=1 MPI_HOME=/opt/mpi ROCM_HOME=/opt/dtk NCCL_HOME=/opt/dtk/rccl \
CUSTOM_RCCL_LIB=/opt/dtk/rccl/lib/librccl.so -j32; then
./build/all_reduce_perf -b 8 -e 1G -f 2 -g 8 2>&1 | tee "$log_dir/43_RCCL_all_reduce_8卡测试.log" || true
./build/all_reduce_perf -b 4 -e 1G -f 2 -g 4 2>&1 | tee "$log_dir/44_RCCL_all_reduce_4卡测试.log" || true
else
echo "[ERROR] RCCL编译失败" | tee "$log_dir/45_RCCL编译失败.log"
fi
cd ../..
else
echo "[WARN] 未找到 rccl-tests.zip,跳过 RCCL 测试" | tee "$log_dir/46_RCCL测试跳过.log"
fi'
# ------------------------- 9.DCU环境检查 -------------------------
safe_run "9.DCU环境检查" \
'cd /workspace/test/env_check_tools/ || { echo "[ERROR] 无法进入/workspace/test/env_check_tools/目录"; exit 1; }' \
'if [ -f "dcu_env_check.zip" ]; then
echo "[INFO] 发现 dcu_env_check.zip,开始解压..."
unzip -o dcu_env_check.zip -d dcu_env_check || {
echo "[ERROR] dcu_env_check.zip 解压失败" | tee "$log_dir/47_DCU环境检查解压失败.log"
exit 1
}
chmod +x dcu_env_check/dcu_env_check-main/tools/*
cd dcu_env_check/dcu_env_check-main && {
bash system_check.sh 2>&1 | tee "$log_dir/48_DCU环境检查结果.log" || true
cp system_info* /workspace/test/env_check_outputs/ || true
cd ../..
} || {
echo "[ERROR] DCU环境检查执行失败" | tee "$log_dir/49_DCU环境检查执行失败.log"
}
else
echo "[WARN] 未找到 dcu_env_check.zip,跳过 DCU 环境检查" | tee "$log_dir/50_DCU环境检查跳过.log"
fi'
echo "==================== 检查完成 ===================="
echo "所有日志已保存至: $log_dir"
ls -lh "$log_dir"
\ No newline at end of file
#!/bin/bash
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export ALLREDUCE_STREAM_WITH_COMPUTE=1
export NCCL_MIN_NCHANNELS=16
export NCCL_MAX_NCHANNELS=16
export VLLM_PCIE_USE_CUSTOM_ALLREDUCE=1
export VLLM_USE_TRITON_PREFIX_FLASH_ATTN=1
export VLLM_NUMA_BIND=1
export VLLM_RANK0_NUMA=0
export VLLM_RANK1_NUMA=1
export VLLM_RANK2_NUMA=2
export VLLM_RANK3_NUMA=3
export VLLM_RANK4_NUMA=4
export VLLM_RANK5_NUMA=5
export VLLM_RANK6_NUMA=6
export VLLM_RANK7_NUMA=7
# 从环境变量读取参数
model_name=${MODEL_NAME}
model_path=${MODEL_PATH}
tp=${TP}
data_type=${DATA_TYPE}
batch_list=${BATCH_LIST}
prompt_pairs=${PROMPT_PAIRS}
port=${PORT}
# 生成结果文件名
result_file="/workspace/test/inference_outputs/results/${model_name}_tp${tp}.csv"
echo "tp,data_type,batch,prompt_tokens,completion_tokens,TOTAL_THROUGHPUT(toks/s),generate_throughput(toks/s),TTFT(ms),TPOT(ms),ITL(ms)" > "$result_file"
# 转换字符串为数组
IFS=' ' read -ra batches <<< "$batch_list"
IFS=',' read -ra pairs <<< "$prompt_pairs"
# 执行测试
for batch in "${batches[@]}"; do
for pair in "${pairs[@]}"; do
IFS=' ' read -r prompt_tokens completion_tokens <<< "$pair"
log_file="/workspace/test/inference_outputs/logs/models/${model_name}_${tp}/batch_${batch}_prompt_${prompt_tokens}_completion_${completion_tokens}.log"
mkdir -p "$(dirname "$log_file")"
echo "Running: batch=$batch, prompt=$prompt_tokens, completion=$completion_tokens"
python benchmark_serving.py \
--backend openai \
--port "$port" \
--model "$model_path" \
--trust-remote-code \
--dataset-name random \
--ignore-eos \
--random-input-len "$prompt_tokens" \
--random-output-len "$completion_tokens" \
--num-prompts "$batch" \
2>&1 | tee "$log_file"
# 提取指标
TOTAL_THROUGHPUT=$(grep "^Total Token" "$log_file" | awk '{print $5}')
GEN_THROUGHPUT=$(grep "^Output token" "$log_file" | awk '{print $5}')
TTFT=$(grep "^Mean TTFT" "$log_file" | awk '{print $4}')
TPOT=$(grep "^Mean TPOT" "$log_file" | awk '{print $4}')
ITL=$(grep "^Mean ITL" "$log_file" | awk '{print $4}')
echo "$tp,$data_type,$batch,$prompt_tokens,$completion_tokens,$TOTAL_THROUGHPUT,$GEN_THROUGHPUT,$TTFT,$TPOT,$ITL" >> "$result_file"
done
done
docker build -t vllm-test1 . && \
docker run \
-v /usr/local/hyhal:/usr/local/hyhal:ro \
-v /opt/hyhal:/opt/hyhal:ro \
-v $PWD/outputs/env_check_outputs:/workspace/test/env_check_outputs/ \
-v $PWD/outputs/models:/workspace/test/models/ \
-v $PWD/outputs/inference_outputs:/workspace/test/inference_outputs/ \
--ipc=host \
--network=host \
--cap-add=SYS_PTRACE \
--group-add video \
--ulimit memlock=-1:-1 \
--privileged \
--device=/dev/kfd \
--device=/dev/mkfd \
--device=/dev/dri \
--shm-size=500G \
-u root \
--security-opt seccomp=unconfined \
vllm-test1 \
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment