Commit bc71a5ea authored by zk's avatar zk
Browse files

Add GPU DCU monitor with grouping

parents
node_modules/
data/servers.json
*.log
.env
# GPU/DCU 资源看板
一个面向测试团队共享使用的 GPU/DCU 服务器占用看板。它通过 SSH 定时在服务器上执行 `hy-smi``nvidia-smi`,集中展示每台机器的卡数、型号、显存占用、算力占用、温度、功耗、空闲/占用/离线状态。
## 推荐使用方式
最推荐的方式是:**在一台能免密 SSH 登录所有服务器的电脑或跳板机上部署一次,然后把网页地址共享给大家。**
这样每个同事不用单独安装,也不用每个人都配置服务器 SSH 密钥。大家只需要访问:
```text
http://部署机器IP:3066
```
## 快速启动
Windows 用户可以双击:
```text
start-windows.bat
```
或者手动启动:
```powershell
npm start
```
默认地址:
```text
http://localhost:3066
```
如果要给同网段同事访问,在部署机器的防火墙放行 `3066` 端口后,让大家访问:
```text
http://你的电脑IP:3066
```
## 使用前提
- 部署机器需要安装 Node.js 18 或更高版本。
- 部署机器需要能通过 SSH 免密登录目标服务器。
- 海光服务器上需要能执行 `hy-smi`
- NVIDIA 服务器上需要能执行 `nvidia-smi`
建议先在部署机器上验证:
```powershell
ssh root@10.0.0.12 hy-smi
ssh root@10.0.0.13 nvidia-smi
```
## 功能
- 网页端添加、编辑、删除服务器。
- 自动识别卡数。
- 自动识别显卡型号:
- 海光:通过 `hy-smi --showproductname` 识别,例如 `BW100``BW150`,未来新型号也会按输出自动提取。
- NVIDIA:通过 `nvidia-smi --query-gpu=name` 识别。
- 同时展示显存占用和算力占用。
- 显存泄漏或 vLLM 服务未释放显存时,即使算力为 0,也会判定该卡不可用。
- 主界面水位色块按占用率从下往上填充。
- 占用颜色分级:绿色、黄绿色、橘色、红色。
- 默认每 10 秒自动采集一次,也可以点“手动刷新”立即刷新。
## 配置说明
网页里点击“添加服务器”即可写入配置。真实配置保存在:
```text
data/servers.json
```
该文件已被 `.gitignore` 忽略,避免把真实服务器地址提交到仓库。示例配置见:
```text
data/servers.sample.json
```
服务器支持 `group` 分组字段,适合公共部署时按项目或客户线管理,例如“通信中兴组”“政府联想组”“企业浪潮组”“金融华三组”“深度组”。页面会自动生成分组筛选入口;`tags` 仍然可以用于补充“8卡”“回归”“临时”等标签。
## 环境变量
```powershell
$env:PORT=3066
$env:POLL_INTERVAL_MS=10000
$env:SSH_TIMEOUT_MS=8000
npm start
```
常用配置:
- `PORT`:网页端口,默认 `3066`
- `POLL_INTERVAL_MS`:自动采集间隔,默认 `10000` 毫秒。
- `SSH_TIMEOUT_MS`:单台服务器 SSH 超时,默认 `8000` 毫秒。
- `SSH_PATH`:自定义 SSH 程序路径。Windows 默认使用 `C:\Windows\System32\OpenSSH\ssh.exe`
## 上传到 GitHub
首次上传:
```powershell
git init
git add .
git commit -m "Initial GPU DCU monitor"
git branch -M main
git remote add origin https://github.com/你的用户名/你的仓库名.git
git push -u origin main
```
后续更新:
```powershell
git add .
git commit -m "Update dashboard"
git push
```
## 给同事一键使用
最简单的方式:
1. 在 GitHub 页面点击 `Code` -> `Download ZIP`
2. 解压 ZIP。
3. 双击 `start-windows.bat`
4. 打开或分享 `http://localhost:3066` / `http://部署机器IP:3066`
更推荐的团队方式:
1. 由你在一台固定机器上运行 `start-windows.bat`
2. 你在网页里添加所有服务器。
3. 同事只访问 `http://部署机器IP:3066`
如果你想做到真正的“无需安装 Node.js,双击一个 exe 就能运行”,后续可以再做 Windows 打包版。常见路线是 Electron、pkg/nexe 或 Inno Setup,但会比当前轻量 Web 版复杂一些。
## 注意事项
- 不要把 `data/servers.json` 提交到 GitHub,里面可能包含内部服务器 IP。
- 不要把 SSH 私钥提交到 GitHub。
- 如果同事访问不了网页,优先检查部署机器防火墙是否放行了 `3066` 端口。
[
{
"id": "sample-8-card",
"name": "通信中兴测试机 A",
"host": "10.0.0.12",
"port": 22,
"user": "root",
"command": "hy-smi",
"group": "通信中兴组",
"tags": ["8卡", "公共池"]
},
{
"id": "sample-4-card",
"name": "政府联想回归机 B",
"host": "10.0.0.23",
"port": 22,
"user": "root",
"command": "hy-smi",
"group": "政府联想组",
"tags": ["4卡", "夜间回归"]
}
]
{
"name": "gpu-dcu-server-monitor",
"version": "0.1.0",
"private": true,
"description": "Shared web dashboard for monitoring GPU/DCU server occupancy.",
"type": "commonjs",
"scripts": {
"start": "node server.js"
},
"engines": {
"node": ">=18"
}
}
const state = {
servers: [],
filter: "all",
groupFilter: "all",
query: "",
selectedId: null,
pollIntervalMs: 10000,
timer: null
};
const els = {
grid: document.querySelector("#serverGrid"),
empty: document.querySelector("#emptyState"),
detail: document.querySelector("#detailPanel"),
groupFilters: document.querySelector("#groupFilters"),
groupOptions: document.querySelector("#groupOptions"),
lastRefresh: document.querySelector("#lastRefresh"),
search: document.querySelector("#searchInput"),
toast: document.querySelector("#toast"),
dialog: document.querySelector("#serverDialog"),
form: document.querySelector("#serverForm"),
dialogTitle: document.querySelector("#dialogTitle"),
deleteBtn: document.querySelector("#deleteServerBtn"),
fields: {
id: document.querySelector("#serverId"),
name: document.querySelector("#serverName"),
host: document.querySelector("#serverHost"),
user: document.querySelector("#serverUser"),
port: document.querySelector("#serverPort"),
command: document.querySelector("#serverCommand"),
group: document.querySelector("#serverGroup"),
tags: document.querySelector("#serverTags")
}
};
document.querySelector("#addServerBtn").addEventListener("click", () => openDialog());
document.querySelector("#emptyAddBtn").addEventListener("click", () => openDialog());
document.querySelector("#closeDialogBtn").addEventListener("click", () => els.dialog.close());
document.querySelector("#refreshBtn").addEventListener("click", manualRefresh);
document.querySelectorAll(".filter").forEach((button) => {
button.addEventListener("click", () => {
state.filter = button.dataset.filter;
document.querySelectorAll(".filter").forEach((item) => item.classList.toggle("active", item === button));
render();
});
});
els.search.addEventListener("input", () => {
state.query = els.search.value.trim().toLowerCase();
render();
});
els.form.addEventListener("submit", saveServer);
els.deleteBtn.addEventListener("click", deleteSelectedServer);
loadServers();
async function loadServers() {
try {
const payload = await requestJson("/api/servers");
state.servers = payload.servers || [];
state.pollIntervalMs = payload.pollIntervalMs || state.pollIntervalMs;
els.lastRefresh.textContent = payload.lastRefresh ? `更新 ${formatTime(payload.lastRefresh)}` : "等待刷新";
if (!state.selectedId && state.servers[0]) state.selectedId = state.servers[0].id;
if (state.selectedId && !state.servers.some((server) => server.id === state.selectedId)) {
state.selectedId = state.servers[0]?.id || null;
}
render();
scheduleNextLoad();
} catch (error) {
showToast(error.message);
scheduleNextLoad();
}
}
function scheduleNextLoad() {
window.clearTimeout(state.timer);
state.timer = window.setTimeout(loadServers, state.pollIntervalMs);
}
async function manualRefresh() {
const button = document.querySelector("#refreshBtn");
button.disabled = true;
try {
await requestJson("/api/refresh", { method: "POST" });
await loadServers();
showToast("刷新完成");
} catch (error) {
showToast(error.message);
} finally {
button.disabled = false;
}
}
function render() {
renderStats();
renderGroups();
renderGrid();
renderDetail();
}
function renderGroups() {
const groups = groupSummaries();
if (!groups.some((group) => group.name === state.groupFilter)) {
state.groupFilter = "all";
}
if (els.groupFilters) {
els.groupFilters.innerHTML = [
groupButtonHtml("all", "全部分组", state.servers.length),
...groups.map((group) => groupButtonHtml(group.name, group.name, group.count))
].join("");
els.groupFilters.querySelectorAll(".group-filter").forEach((button) => {
button.addEventListener("click", () => {
state.groupFilter = button.dataset.group;
render();
});
});
}
if (els.groupOptions) {
const defaults = ["通信中兴组", "政府联想组", "企业浪潮组", "金融华三组", "深度组", "未分组"];
const names = [...new Set([...defaults, ...groups.map((group) => group.name)])];
els.groupOptions.innerHTML = names.map((name) => `<option value="${escapeHtml(name)}"></option>`).join("");
}
}
function groupSummaries() {
const byGroup = new Map();
for (const server of state.servers) {
const group = serverGroup(server);
byGroup.set(group, (byGroup.get(group) || 0) + 1);
}
return [...byGroup.entries()]
.map(([name, count]) => ({ name, count }))
.sort((a, b) => a.name.localeCompare(b.name, "zh-CN"));
}
function groupButtonHtml(value, label, count) {
const active = state.groupFilter === value ? " active" : "";
return `
<button class="group-filter${active}" data-group="${escapeHtml(value)}" type="button">
<span>${escapeHtml(label)}</span><strong>${count}</strong>
</button>`;
}
function renderStats() {
const totals = state.servers.reduce(
(acc, server) => {
const status = server.status || {};
acc.cards += status.totalCount || server.gpuCount || 0;
acc.busyCards += status.busyCount || 0;
acc.freeCards += status.freeCount || 0;
if (getServerKind(server) === "free") acc.freeServers += 1;
if (getServerKind(server) === "busy") acc.busyServers += 1;
if (getServerKind(server) === "offline") acc.offlineServers += 1;
return acc;
},
{ cards: 0, busyCards: 0, freeCards: 0, freeServers: 0, busyServers: 0, offlineServers: 0 }
);
setText("#statServers", state.servers.length);
setText("#statCards", totals.cards);
setText("#statFreeCards", totals.freeCards);
setText("#statBusyCards", totals.busyCards);
setText("#countAll", state.servers.length);
setText("#countFree", totals.freeServers);
setText("#countBusy", totals.busyServers);
setText("#countOffline", totals.offlineServers);
}
function renderGrid() {
const servers = filteredServers();
els.grid.innerHTML = "";
els.empty.classList.toggle("hidden", state.servers.length !== 0);
els.grid.classList.toggle("hidden", state.servers.length === 0);
for (const server of servers) {
const status = server.status || {};
const busyPercent = status.totalCount ? Math.round(((status.busyCount || 0) / status.totalCount) * 100) : 0;
const card = document.createElement("article");
card.className = `server-card ${serverOccupancyClass(server)} ${server.id === state.selectedId ? "selected" : ""}`;
card.tabIndex = 0;
card.innerHTML = serverCardHtml(server);
card.addEventListener("click", () => {
state.selectedId = server.id;
render();
});
card.addEventListener("keydown", (event) => {
if (event.key === "Enter" || event.key === " ") {
event.preventDefault();
state.selectedId = server.id;
render();
}
});
card.querySelector(".edit-card").addEventListener("click", (event) => {
event.stopPropagation();
openDialog(server);
});
card.style.setProperty("--busy", `${busyPercent}%`);
els.grid.appendChild(card);
}
}
function serverCardHtml(server) {
const status = server.status || {};
const kind = getServerKind(server);
const serverLevel = serverOccupancyClass(server);
const totalCount = status.totalCount || server.gpuCount || 0;
const tags = [...new Set([serverGroup(server), ...(server.tags?.length ? server.tags : [totalCount ? `${totalCount}卡` : "自动识别"])])];
return `
<div class="card-head">
<div>
<div class="server-name">${escapeHtml(server.name)}</div>
<div class="server-host">${escapeHtml(server.user ? `${server.user}@${server.host}` : server.host)}:${server.port}</div>
<div class="server-model">${escapeHtml(modelSummary(server))}</div>
</div>
<span class="status-pill status-${kind} ${serverLevel}">${kindLabel(kind)}</span>
</div>
<div class="gpu-ring">
<div class="donut ${serverLevel}"><span>${totalCount ? `${status.busyCount || 0}/${totalCount}` : "识别中"}</span></div>
<div class="summary">
<strong>${escapeHtml(status.summary || "等待刷新")}</strong>
<span>${status.updatedAt ? formatTime(status.updatedAt) : "未采集"}</span>
</div>
</div>
<div class="gpu-grid">
${gpuChips(status.gpus || [], totalCount, kind)}
</div>
<div class="tag-list">
${tags.map((tag) => `<span class="tag">${escapeHtml(tag)}</span>`).join("")}
<button class="icon-button edit-card" type="button" aria-label="编辑服务器">✎</button>
</div>
`;
}
function gpuChips(gpus, count, serverKind) {
const list = gpus.length ? gpus : Array.from({ length: count }, (_, index) => ({ index, state: "unknown" }));
return list
.slice(0, count)
.map((gpu) => {
const cls = serverKind === "offline" ? "offline" : gpu.state || "unknown";
const chipLevel = gpuOccupancyClass(gpu);
const compute = formatPercent(gpu.utilization);
const vram = formatPercent(gpu.memoryUtilization);
const computeLevel = normalizePercent(gpu.utilization);
const vramLevel = normalizePercent(gpu.memoryUtilization);
const computeClass = occupancyClass(gpu.utilization);
const vramClass = occupancyClass(gpu.memoryUtilization);
return `
<span class="gpu-chip ${cls} ${chipLevel}">
<b>#${escapeHtml(gpu.index)}</b>
<span class="chip-metrics">
<span class="chip-block memory ${vramClass}" style="--level:${vramLevel}%">
<i></i>
<em>显存</em>
<strong>${escapeHtml(vram)}</strong>
</span>
<span class="chip-block compute ${computeClass}" style="--level:${computeLevel}%">
<i></i>
<em>算力</em>
<strong>${escapeHtml(compute)}</strong>
</span>
</span>
</span>`;
})
.join("");
}
function renderDetail() {
const server = state.servers.find((item) => item.id === state.selectedId);
if (!server) {
els.detail.innerHTML = `
<div class="detail-empty">
<div class="detail-pulse"></div>
<h3>选择一台服务器</h3>
<p>查看每张 GPU/DCU 卡的占用、显存、温度和连接状态。</p>
</div>`;
return;
}
const status = server.status || {};
const kind = getServerKind(server);
const totalCount = status.totalCount || server.gpuCount || 0;
els.detail.innerHTML = `
<div class="detail-head">
<div>
<p class="eyebrow">${totalCount ? `${escapeHtml(totalCount)}卡服务器` : "自动识别卡数"} · ${commandLabel(server.command)}</p>
<h3>${escapeHtml(server.name)}</h3>
</div>
<button class="icon-button" id="detailEditBtn" type="button" aria-label="编辑服务器">✎</button>
</div>
<div class="detail-meta">
<div class="meta-box"><span>状态</span><strong>${kindLabel(kind)}</strong></div>
<div class="meta-box"><span>占用</span><strong>${totalCount ? `${status.busyCount || 0}/${totalCount}` : "识别中"}</strong></div>
<div class="meta-box"><span>分组</span><strong>${escapeHtml(serverGroup(server))}</strong></div>
<div class="meta-box"><span>地址</span><strong>${escapeHtml(server.host)}:${server.port}</strong></div>
<div class="meta-box"><span>型号</span><strong>${escapeHtml(modelSummary(server))}</strong></div>
<div class="meta-box"><span>延迟</span><strong>${status.latencyMs ? `${status.latencyMs}ms` : "-"}</strong></div>
</div>
${status.error ? `<div class="meta-box"><span>错误</span><strong>${escapeHtml(status.error)}</strong></div>` : ""}
<div class="gpu-list">
${(status.gpus || []).map(gpuRowHtml).join("")}
</div>
`;
document.querySelector("#detailEditBtn").addEventListener("click", () => openDialog(server));
}
function gpuRowHtml(gpu) {
const utilization = normalizePercent(gpu.utilization);
const memoryUtilization = normalizePercent(gpu.memoryUtilization);
const utilizationClass = occupancyClass(gpu.utilization);
const memoryUtilizationClass = occupancyClass(gpu.memoryUtilization);
const memory = gpu.memoryTotalMiB
? `${gpu.memoryUsedMiB || 0}/${gpu.memoryTotalMiB} MiB`
: gpu.memoryUtilization !== null && gpu.memoryUtilization !== undefined
? `${gpu.memoryUtilization}%`
: "-";
return `
<div class="gpu-row">
<div class="gpu-row-head">
<strong>卡 #${gpu.index}${gpu.model ? ` · ${escapeHtml(gpu.model)}` : ""}</strong>
<span>${gpuStateLabel(gpu.state)}</span>
</div>
<div class="bar-stack">
<div class="metric-line">
<span>显存</span>
<div class="bar memory"><i class="${memoryUtilizationClass}" style="width:${memoryUtilization}%"></i></div>
<strong>${formatPercent(gpu.memoryUtilization)}</strong>
</div>
<div class="metric-line">
<span>算力</span>
<div class="bar compute"><i class="${utilizationClass}" style="width:${utilization}%"></i></div>
<strong>${formatPercent(gpu.utilization)}</strong>
</div>
</div>
<div class="gpu-metrics">
<span>显存 ${escapeHtml(memory)}</span>
<span>温度 ${gpu.temperatureC ?? "-"}</span>
<span>功耗 ${gpu.powerW ?? "-"}W</span>
</div>
</div>`;
}
function normalizePercent(value) {
const number = Number(value);
if (!Number.isFinite(number)) return 0;
return Math.max(0, Math.min(100, number));
}
function formatPercent(value) {
if (value === null || value === undefined || value === "") return "-";
const number = Number(value);
if (!Number.isFinite(number)) return "-";
return `${Number.isInteger(number) ? number : number.toFixed(1)}%`;
}
function occupancyClass(value) {
const percent = normalizePercent(value);
if (percent >= 80) return "level-critical";
if (percent >= 40) return "level-warning";
if (percent >= 10) return "level-low";
return "level-free";
}
function gpuOccupancyClass(gpu) {
return occupancyClass(Math.max(normalizePercent(gpu.memoryUtilization), normalizePercent(gpu.utilization)));
}
function serverOccupancyClass(server) {
const gpus = server.status?.gpus || [];
if (!gpus.length) return "level-free";
const max = gpus.reduce((value, gpu) => Math.max(value, normalizePercent(gpu.memoryUtilization), normalizePercent(gpu.utilization)), 0);
return occupancyClass(max);
}
function filteredServers() {
return state.servers.filter((server) => {
const kind = getServerKind(server);
const matchesFilter = state.filter === "all" || state.filter === kind;
const matchesGroup = state.groupFilter === "all" || serverGroup(server) === state.groupFilter;
const text = [server.name, server.host, server.user, serverGroup(server), modelSummary(server), ...(server.tags || [])].join(" ").toLowerCase();
return matchesFilter && matchesGroup && (!state.query || text.includes(state.query));
});
}
function serverGroup(server) {
return String(server.group || "未分组").trim() || "未分组";
}
function getServerKind(server) {
const status = server.status || {};
if (status.state === "offline") return "offline";
if (status.state === "pending") return "pending";
return (status.busyCount || 0) > 0 ? "busy" : "free";
}
function kindLabel(kind) {
return { free: "空闲", busy: "占用", offline: "离线", pending: "刷新中" }[kind] || "未知";
}
function gpuStateLabel(kind) {
return { free: "空闲", busy: "占用", offline: "离线", unknown: "未知" }[kind] || "未知";
}
function commandLabel(command) {
return command === "nvidia-smi" ? "NVIDIA GPU" : "海光 DCU";
}
function modelSummary(server) {
const models = server.status?.models?.length
? server.status.models
: (server.status?.gpus || []).map((gpu) => gpu.model).filter(Boolean);
const unique = [...new Set(models)];
if (!unique.length) return "型号识别中";
return unique.length === 1 ? unique[0] : unique.join(" / ");
}
function openDialog(server) {
const editing = Boolean(server);
els.dialogTitle.textContent = editing ? "编辑服务器" : "添加服务器";
els.deleteBtn.classList.toggle("hidden", !editing);
els.fields.id.value = server?.id || "";
els.fields.name.value = server?.name || "";
els.fields.host.value = server?.host || "";
els.fields.user.value = server?.user || "root";
els.fields.port.value = server?.port || 22;
els.fields.command.value = server?.command || "hy-smi";
els.fields.group.value = server?.group || "";
els.fields.tags.value = (server?.tags || []).join(", ");
els.dialog.showModal();
els.fields.name.focus();
}
async function saveServer(event) {
event.preventDefault();
const id = els.fields.id.value;
const body = {
name: els.fields.name.value,
host: els.fields.host.value,
user: els.fields.user.value,
port: Number(els.fields.port.value || 22),
command: els.fields.command.value || "hy-smi",
group: els.fields.group.value,
tags: els.fields.tags.value
};
try {
const payload = await requestJson(id ? `/api/servers/${encodeURIComponent(id)}` : "/api/servers", {
method: id ? "PATCH" : "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body)
});
state.selectedId = payload.server.id;
els.dialog.close();
await loadServers();
showToast("已保存");
} catch (error) {
showToast(error.message);
}
}
async function deleteSelectedServer() {
const id = els.fields.id.value;
if (!id) return;
try {
await requestJson(`/api/servers/${encodeURIComponent(id)}`, { method: "DELETE" });
state.selectedId = null;
els.dialog.close();
await loadServers();
showToast("已删除");
} catch (error) {
showToast(error.message);
}
}
async function requestJson(url, options) {
const response = await fetch(url, options);
const payload = await response.json().catch(() => ({}));
if (!response.ok) {
throw new Error(payload.error || `请求失败 ${response.status}`);
}
return payload;
}
function showToast(message) {
els.toast.textContent = message;
els.toast.classList.remove("hidden");
window.clearTimeout(els.toastTimer);
els.toastTimer = window.setTimeout(() => els.toast.classList.add("hidden"), 2600);
}
function setText(selector, value) {
document.querySelector(selector).textContent = value;
}
function formatTime(value) {
return new Date(value).toLocaleTimeString("zh-CN", { hour12: false });
}
function escapeHtml(value) {
return String(value ?? "")
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>GPU/DCU 资源看板</title>
<link rel="stylesheet" href="/styles.css" />
</head>
<body>
<div class="shell">
<aside class="sidebar">
<div class="brand">
<div class="brand-mark">G</div>
<div>
<h1>GPU/DCU 资源看板</h1>
<p id="lastRefresh">等待刷新</p>
</div>
</div>
<div class="filters" role="tablist" aria-label="服务器筛选">
<button class="filter active" data-filter="all" type="button">
<span>全部</span><strong id="countAll">0</strong>
</button>
<button class="filter" data-filter="free" type="button">
<span>空闲</span><strong id="countFree">0</strong>
</button>
<button class="filter" data-filter="busy" type="button">
<span>占用</span><strong id="countBusy">0</strong>
</button>
<button class="filter" data-filter="offline" type="button">
<span>离线</span><strong id="countOffline">0</strong>
</button>
</div>
<div class="sidebar-actions">
<button class="primary-action" id="addServerBtn" type="button">
<span aria-hidden="true"></span>添加服务器
</button>
<button class="ghost-action" id="refreshBtn" type="button">
<span aria-hidden="true"></span>手动刷新
</button>
</div>
</aside>
<main class="content">
<section class="topline">
<div>
<p class="eyebrow">共享测试资源</p>
<h2>服务器占用情况</h2>
</div>
<div class="search-wrap">
<span aria-hidden="true"></span>
<input id="searchInput" type="search" placeholder="搜索服务器、IP、标签" />
</div>
</section>
<section class="stats" aria-label="资源统计">
<div class="stat">
<span>服务器</span>
<strong id="statServers">0</strong>
</div>
<div class="stat">
<span>总卡数</span>
<strong id="statCards">0</strong>
</div>
<div class="stat good">
<span>空闲卡</span>
<strong id="statFreeCards">0</strong>
</div>
<div class="stat warn">
<span>占用卡</span>
<strong id="statBusyCards">0</strong>
</div>
</section>
<section class="group-panel" id="groupFilters" aria-label="服务器分组"></section>
<section class="server-grid" id="serverGrid" aria-live="polite"></section>
<section class="empty-state hidden" id="emptyState">
<div class="empty-icon"></div>
<h3>添加第一台服务器</h3>
<p>配置 SSH 登录信息后,看板会定时采集显存和算力占用。</p>
<button class="primary-action compact" id="emptyAddBtn" type="button">添加服务器</button>
</section>
</main>
<aside class="detail" id="detailPanel">
<div class="detail-empty">
<div class="detail-pulse"></div>
<h3>选择一台服务器</h3>
<p>查看每张加速卡的占用、显存、温度和连接状态。</p>
</div>
</aside>
</div>
<dialog id="serverDialog" class="server-dialog">
<form id="serverForm" method="dialog">
<div class="dialog-head">
<div>
<p class="eyebrow">服务器配置</p>
<h3 id="dialogTitle">添加服务器</h3>
</div>
<button class="icon-button" id="closeDialogBtn" type="button" aria-label="关闭">×</button>
</div>
<input id="serverId" type="hidden" />
<label>
<span>名称</span>
<input id="serverName" autocomplete="off" required placeholder="例如:算法公共机 A" />
</label>
<label>
<span>Host / IP</span>
<input id="serverHost" autocomplete="off" required placeholder="10.0.0.12" />
</label>
<div class="field-row">
<label>
<span>SSH 用户</span>
<input id="serverUser" autocomplete="username" value="root" placeholder="root" />
</label>
<label>
<span>端口</span>
<input id="serverPort" type="number" min="1" max="65535" value="22" />
</label>
</div>
<label>
<span>分组</span>
<input id="serverGroup" list="groupOptions" autocomplete="off" placeholder="通信中兴组" />
<datalist id="groupOptions"></datalist>
</label>
<label>
<span>标签</span>
<input id="serverTags" placeholder="公共池, 回归" />
</label>
<label>
<span>采集命令</span>
<select id="serverCommand">
<option value="hy-smi">hy-smi(海光 DCU)</option>
<option value="nvidia-smi">nvidia-smi(NVIDIA GPU)</option>
</select>
</label>
<div class="dialog-actions">
<button class="ghost-action compact" id="deleteServerBtn" type="button">删除</button>
<button class="primary-action compact" type="submit">保存</button>
</div>
</form>
</dialog>
<div class="toast hidden" id="toast"></div>
<script src="/app.js"></script>
</body>
</html>
:root {
color-scheme: light;
--bg: #eef3f6;
--panel: #ffffff;
--panel-soft: #f7fafb;
--text: #17212b;
--muted: #687684;
--line: #dbe4e9;
--teal: #0f9f9a;
--teal-dark: #087873;
--green: #19a35b;
--amber: #d28411;
--red: #d84949;
--blue: #2c75d6;
--shadow: 0 20px 60px rgba(30, 51, 61, 0.12);
font-family: Inter, "Segoe UI", "Microsoft YaHei", system-ui, sans-serif;
}
* {
box-sizing: border-box;
}
html,
body {
width: 100%;
height: 100%;
}
body {
margin: 0;
background:
linear-gradient(135deg, rgba(15, 159, 154, 0.08), transparent 34%),
linear-gradient(315deg, rgba(44, 117, 214, 0.08), transparent 32%),
var(--bg);
color: var(--text);
overflow: hidden;
}
button,
input,
select {
font: inherit;
}
button {
cursor: pointer;
}
.shell {
display: grid;
grid-template-columns: 252px minmax(560px, 1fr) 340px;
gap: 12px;
width: 100%;
height: 100dvh;
min-height: 0;
padding: 12px;
}
.sidebar,
.content,
.detail {
background: rgba(255, 255, 255, 0.86);
border: 1px solid rgba(219, 228, 233, 0.86);
box-shadow: var(--shadow);
backdrop-filter: blur(18px);
}
.sidebar {
display: flex;
flex-direction: column;
gap: 18px;
min-height: 0;
border-radius: 18px;
padding: 16px;
}
.brand {
display: flex;
align-items: center;
gap: 12px;
}
.brand-mark {
display: grid;
place-items: center;
width: 42px;
height: 42px;
border-radius: 12px;
background: var(--text);
color: white;
font-weight: 800;
}
h1,
h2,
h3,
p {
margin: 0;
}
h1 {
font-size: 18px;
}
h2 {
margin-top: 4px;
font-size: 30px;
letter-spacing: 0;
}
.brand p,
.detail-empty p,
.empty-state p {
margin-top: 4px;
color: var(--muted);
font-size: 13px;
}
.filters {
display: grid;
gap: 8px;
}
.filter {
display: flex;
align-items: center;
justify-content: space-between;
min-height: 44px;
border: 1px solid transparent;
border-radius: 10px;
background: transparent;
color: var(--muted);
padding: 0 12px;
}
.filter.active,
.filter:hover {
border-color: var(--line);
background: var(--panel-soft);
color: var(--text);
}
.filter strong {
color: inherit;
}
.sidebar-actions {
display: grid;
gap: 10px;
margin-top: auto;
}
.primary-action,
.ghost-action {
display: inline-flex;
align-items: center;
justify-content: center;
gap: 8px;
min-height: 44px;
border-radius: 10px;
border: 1px solid transparent;
padding: 0 14px;
font-weight: 700;
}
.primary-action {
background: var(--teal);
color: #fff;
}
.primary-action:hover {
background: var(--teal-dark);
}
.ghost-action {
background: var(--panel-soft);
border-color: var(--line);
color: var(--text);
}
.compact {
min-height: 38px;
}
.content {
display: flex;
flex-direction: column;
min-height: 0;
border-radius: 18px;
padding: 18px;
overflow: hidden;
}
.topline {
display: flex;
align-items: center;
justify-content: space-between;
gap: 18px;
flex: 0 0 auto;
}
.eyebrow {
color: var(--teal-dark);
font-size: 12px;
font-weight: 800;
}
.search-wrap {
display: flex;
align-items: center;
gap: 8px;
width: min(360px, 100%);
min-height: 42px;
border: 1px solid var(--line);
border-radius: 10px;
background: #fff;
padding: 0 12px;
color: var(--muted);
}
.search-wrap input {
width: 100%;
border: 0;
outline: 0;
background: transparent;
color: var(--text);
}
.stats {
display: grid;
grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 10px;
margin: 14px 0;
flex: 0 0 auto;
}
.group-panel {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin: 0 0 12px;
flex: 0 0 auto;
}
.group-filter {
display: inline-flex;
align-items: center;
gap: 8px;
min-height: 34px;
border: 1px solid var(--line);
border-radius: 999px;
background: #fff;
color: var(--muted);
padding: 0 12px;
font-size: 13px;
font-weight: 800;
}
.group-filter.active,
.group-filter:hover {
border-color: rgba(15, 159, 154, 0.48);
background: #e9f7f5;
color: var(--teal-dark);
}
.group-filter strong {
min-width: 18px;
border-radius: 999px;
background: rgba(15, 159, 154, 0.12);
color: inherit;
padding: 2px 6px;
text-align: center;
font-size: 12px;
}
.stat {
min-height: 74px;
border: 1px solid var(--line);
border-radius: 12px;
background: var(--panel-soft);
padding: 12px;
}
.stat span {
color: var(--muted);
font-size: 13px;
}
.stat strong {
display: block;
margin-top: 4px;
font-size: 25px;
}
.stat.good strong {
color: var(--green);
}
.stat.warn strong {
color: var(--amber);
}
.server-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
gap: 12px;
align-items: start;
flex: 1 1 auto;
min-height: 0;
overflow-y: auto;
padding: 2px 4px 12px 2px;
}
.server-card {
min-height: 0;
min-width: 0;
border: 1px solid var(--line);
border-radius: 12px;
background: #fff;
padding: 13px;
transition: transform 0.16s ease, box-shadow 0.16s ease, border-color 0.16s ease;
}
.server-card:hover,
.server-card.selected {
transform: translateY(-2px);
border-color: rgba(15, 159, 154, 0.45);
box-shadow: 0 18px 40px rgba(30, 51, 61, 0.12);
}
.card-head {
display: flex;
align-items: start;
justify-content: space-between;
gap: 12px;
}
.server-name {
font-size: 17px;
font-weight: 800;
word-break: break-word;
}
.server-host {
margin-top: 4px;
color: var(--muted);
font-size: 12px;
}
.server-model {
margin-top: 4px;
color: var(--teal-dark);
font-size: 12px;
font-weight: 800;
overflow-wrap: anywhere;
}
.status-pill {
flex: 0 0 auto;
border-radius: 999px;
padding: 5px 10px;
color: #fff;
font-size: 12px;
font-weight: 800;
}
.status-pill.level-free,
.status-pill.level-low,
.status-pill.level-warning,
.status-pill.level-critical {
background: var(--level-color);
}
.status-free {
background: var(--green);
}
.status-busy {
background: var(--amber);
}
.status-offline {
background: var(--red);
}
.status-pending {
background: var(--blue);
}
.gpu-ring {
display: grid;
grid-template-columns: 72px 1fr;
align-items: center;
gap: 12px;
margin: 12px 0;
}
.donut {
position: relative;
display: grid;
place-items: center;
width: 72px;
height: 72px;
border-radius: 50%;
background: conic-gradient(var(--level-color, var(--green)) var(--busy, 0%), #dfe8ec 0);
}
.donut::before {
content: "";
position: absolute;
width: 50px;
height: 50px;
border-radius: 50%;
background: #fff;
}
.donut span {
position: relative;
z-index: 1;
font-weight: 900;
}
.summary strong {
display: block;
font-size: 21px;
}
.summary span {
color: var(--muted);
font-size: 12px;
}
.gpu-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(58px, 1fr));
gap: 6px;
}
.gpu-chip {
display: grid;
grid-template-rows: 16px 1fr;
gap: 4px;
min-height: 70px;
border: 1px solid #cfe7dd;
border-radius: 8px;
padding: 4px;
font-size: 10px;
font-weight: 800;
background: #e8f5ef;
color: #0c7040;
line-height: 1.2;
}
.gpu-chip b {
color: currentColor;
font-size: 11px;
font-weight: 800;
text-align: center;
}
.gpu-chip.offline,
.gpu-chip.unknown {
background: #edf2f5;
border-color: #d7e0e5;
color: #687684;
}
.gpu-chip.level-free {
background: #e8f5ef;
border-color: #bde4cf;
color: #0c7040;
}
.gpu-chip.level-low {
background: #f1f7dc;
border-color: #d8e899;
color: #617414;
}
.gpu-chip.level-warning {
background: #fff2da;
border-color: #f2cf93;
color: #9b5d08;
}
.gpu-chip.level-critical {
background: #fde8e8;
border-color: #f3b2b2;
color: #9f2f2f;
}
.chip-metrics {
display: grid;
grid-template-columns: repeat(2, minmax(24px, 1fr));
gap: 4px;
min-width: 0;
}
.chip-block {
position: relative;
display: grid;
align-content: center;
justify-items: center;
min-width: 0;
min-height: 44px;
overflow: hidden;
border-radius: 6px;
background: rgba(255, 255, 255, 0.78);
color: var(--text);
}
.chip-block i {
position: absolute;
inset: auto 0 0 0;
width: 100%;
height: var(--level);
opacity: 0.86;
}
.chip-block.memory i {
background: var(--level-color, #19a35b);
}
.chip-block.compute i {
background: var(--level-color, #19a35b);
}
.level-free {
--level-color: #19a35b;
}
.level-low {
--level-color: #a9c83d;
}
.level-warning {
--level-color: #e39a24;
}
.level-critical {
--level-color: #d84949;
}
.chip-block em,
.chip-block strong {
position: relative;
z-index: 1;
font-style: normal;
}
.chip-block em {
font-size: 9px;
color: rgba(23, 33, 43, 0.74);
}
.chip-block strong {
font-size: 10px;
color: var(--text);
}
.tag-list {
display: flex;
flex-wrap: wrap;
gap: 6px;
margin-top: 14px;
}
.tag {
border-radius: 999px;
background: #eef6f6;
color: var(--teal-dark);
padding: 4px 8px;
font-size: 12px;
font-weight: 700;
}
.detail {
min-height: 0;
border-radius: 18px;
padding: 16px;
overflow-y: auto;
}
.detail-empty {
display: grid;
place-items: center;
align-content: center;
min-height: calc(100dvh - 56px);
text-align: center;
}
.detail-pulse,
.empty-icon {
display: grid;
place-items: center;
width: 58px;
height: 58px;
border-radius: 16px;
margin-bottom: 14px;
background: #e7f6f4;
color: var(--teal-dark);
}
.detail-pulse::before {
content: "";
width: 24px;
height: 24px;
border-radius: 50%;
background: var(--teal);
}
.detail-head {
display: flex;
align-items: start;
justify-content: space-between;
gap: 12px;
margin-bottom: 16px;
}
.detail-head h3 {
font-size: 22px;
}
.icon-button {
display: grid;
place-items: center;
width: 36px;
height: 36px;
border: 1px solid var(--line);
border-radius: 10px;
background: #fff;
color: var(--text);
font-size: 20px;
}
.detail-meta {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 10px;
margin-bottom: 16px;
}
.meta-box,
.gpu-row {
border: 1px solid var(--line);
border-radius: 10px;
background: var(--panel-soft);
padding: 12px;
}
.meta-box span,
.gpu-row span {
color: var(--muted);
font-size: 12px;
}
.meta-box strong {
display: block;
margin-top: 4px;
overflow-wrap: anywhere;
}
.gpu-list {
display: grid;
gap: 10px;
}
.gpu-row {
background: #fff;
}
.gpu-row-head {
display: flex;
align-items: center;
justify-content: space-between;
gap: 10px;
margin-bottom: 9px;
}
.bar {
height: 9px;
border-radius: 999px;
background: #e4edf1;
overflow: hidden;
}
.bar > i {
display: block;
width: 0;
height: 100%;
border-radius: inherit;
background: var(--level-color, #19a35b);
}
.bar.memory > i {
background: var(--level-color, #19a35b);
}
.bar.compute > i {
background: var(--level-color, #19a35b);
}
.bar-stack {
display: grid;
gap: 8px;
}
.metric-line {
display: grid;
grid-template-columns: 34px minmax(0, 1fr) 44px;
align-items: center;
gap: 8px;
}
.metric-line strong {
color: var(--text);
font-size: 12px;
text-align: right;
}
.gpu-metrics {
display: grid;
grid-template-columns: repeat(3, minmax(0, 1fr));
gap: 8px;
margin-top: 9px;
font-size: 12px;
}
.empty-state {
display: grid;
place-items: center;
min-height: 360px;
border: 1px dashed #b8c9d1;
border-radius: 14px;
background: rgba(255, 255, 255, 0.55);
text-align: center;
}
.empty-state .primary-action {
margin-top: 14px;
}
.hidden {
display: none !important;
}
.server-dialog {
width: min(540px, calc(100vw - 28px));
border: 1px solid var(--line);
border-radius: 16px;
box-shadow: var(--shadow);
padding: 0;
}
.server-dialog::backdrop {
background: rgba(23, 33, 43, 0.32);
backdrop-filter: blur(4px);
}
.server-dialog form {
display: grid;
gap: 14px;
padding: 20px;
}
.dialog-head,
.dialog-actions,
.field-row {
display: flex;
gap: 12px;
}
.dialog-head,
.dialog-actions {
align-items: center;
justify-content: space-between;
}
label {
display: grid;
gap: 6px;
width: 100%;
color: var(--muted);
font-size: 13px;
font-weight: 700;
}
input,
select {
width: 100%;
min-height: 40px;
border: 1px solid var(--line);
border-radius: 9px;
outline: 0;
background: #fff;
color: var(--text);
padding: 0 11px;
}
input:focus,
select:focus {
border-color: rgba(15, 159, 154, 0.72);
box-shadow: 0 0 0 3px rgba(15, 159, 154, 0.12);
}
.toast {
position: fixed;
right: 22px;
bottom: 22px;
max-width: min(420px, calc(100vw - 44px));
border-radius: 12px;
background: var(--text);
color: #fff;
padding: 12px 14px;
box-shadow: var(--shadow);
font-size: 14px;
}
@media (max-width: 1500px) {
body {
overflow: auto;
}
.shell {
grid-template-columns: 240px minmax(560px, 1fr);
height: auto;
min-height: 100dvh;
}
.content {
min-height: calc(100dvh - 24px);
}
.detail {
grid-column: 1 / -1;
max-height: none;
}
.detail-empty {
min-height: 220px;
}
}
@media (max-width: 900px) {
body {
overflow: auto;
}
.shell {
grid-template-columns: 1fr;
height: auto;
padding: 10px;
}
.content {
min-height: auto;
}
.sidebar {
gap: 14px;
}
.filters,
.stats {
grid-template-columns: repeat(2, minmax(0, 1fr));
}
.server-grid {
grid-template-columns: minmax(0, 1fr);
overflow: visible;
}
.topline,
.field-row {
align-items: stretch;
flex-direction: column;
}
h2 {
font-size: 24px;
}
}
@media (max-width: 480px) {
.gpu-grid {
grid-template-columns: repeat(2, minmax(0, 1fr));
}
}
This diff is collapsed.
@echo off
setlocal
cd /d "%~dp0"
where node >nul 2>nul
if errorlevel 1 (
echo Node.js was not found.
echo Please install Node.js 18 or newer from https://nodejs.org/
echo.
pause
exit /b 1
)
if not exist data mkdir data
echo Starting GPU/DCU resource dashboard...
echo Open http://localhost:3066 in your browser.
echo Press Ctrl+C in this window to stop the dashboard.
echo.
start "" "http://localhost:3066"
npm start
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment