Commit b632c3a7 authored by liming6's avatar liming6
Browse files

feature 添加提取dcu信息的代码

parent 7923c563
...@@ -31,7 +31,7 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能 ...@@ -31,7 +31,7 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能
- docker容器 - docker容器
- 主机进程使用 - 主机进程使用
对于docker容器,我们认为谁创建了容器,那么该容器中的进程如何使用了显卡,就认为是容器的创建者使用了显卡 对于docker容器,我们认为谁创建了容器,那么该容器中的进程使用了显卡,就认为是容器的创建者使用了显卡
对于主机进程,那就是进程的用户使用了显卡 对于主机进程,那就是进程的用户使用了显卡
复杂情况: 复杂情况:
...@@ -39,7 +39,6 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能 ...@@ -39,7 +39,6 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能
- sudo转换用户执行 - sudo转换用户执行
- su转换用户 - su转换用户
思路或方法: 思路或方法:
- docker - docker
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
待办清单: 待办清单:
- 解析/etc/passwd文件,解析Linux系统用户和家目录 - [x] 解析/etc/passwd文件,解析Linux系统用户和家目录
- 解析docker容器相关信息,尝试找出启动容器的用户 - [ ] 解析docker容器相关信息,尝试找出启动容器的用户
- 对于占用tty的命令,查询tty用户 - [ ] 使用包装命令,记录容器和系统用户的对应关系
- 对于非交互式的命令,??? - [ ] 添加一种docker top命令查找进程的容器归属的方法
- [ ] 调研一种golang tui库
package types package types
// ProbeResult 探针的探测结果 // ProcessInfo 进程信息
type ProbeResult struct { type ProcessInfo struct {
NvidiaGPUS string Pid int32
DCUS string User string
NodeInfo string GPUMemUsed uint64
CPUUsed float32
MemUsed float32
RunTime string
Cmdline string
} }
...@@ -2,6 +2,7 @@ package docker ...@@ -2,6 +2,7 @@ package docker
import ( import (
"get-container/utils" "get-container/utils"
"strconv"
"context" "context"
"errors" "errors"
...@@ -17,8 +18,9 @@ import ( ...@@ -17,8 +18,9 @@ import (
/** /**
有两种方法获取进程属于哪个容器 有两种方法获取进程属于哪个容器
1. 通过查询pid命空间 1. 通过查询pid命空间,仅在没有指定--pid参数时有效
2. 通过查询进程的cgroup 2. 通过查询进程的cgroup
3. 使用docker top <container-id>匹配
*/ */
type FindCIDMethod string type FindCIDMethod string
...@@ -26,6 +28,7 @@ type FindCIDMethod string ...@@ -26,6 +28,7 @@ type FindCIDMethod string
const ( const (
ByCgroup FindCIDMethod = "byCGroup" ByCgroup FindCIDMethod = "byCGroup"
ByPidNS FindCIDMethod = "byPidNS" ByPidNS FindCIDMethod = "byPidNS"
ByTop FindCIDMethod = "byTop"
) )
var ( var (
...@@ -38,17 +41,84 @@ type ContainersInfo struct { ...@@ -38,17 +41,84 @@ type ContainersInfo struct {
time time.Time // 记录写入Info的时间 time time.Time // 记录写入Info的时间
inspectInfo map[string]container.InspectResponse inspectInfo map[string]container.InspectResponse
listInfo map[string]container.Summary listInfo map[string]container.Summary
topInfo map[string]container.TopResponse
}
type ContainerPsInfo struct {
Pid uint64
Ppid uint64
Uid string
Cmd []string
}
func ParsePsInfo(topInfo map[string]container.TopResponse) (map[string]ContainerPsInfo, error) {
if topInfo == nil {
return nil, errors.New("topInfo is nil")
}
result := make(map[string]ContainerPsInfo)
indexMap, t := make(map[string]int), 0
for cid, topResp := range topInfo {
for index, key := range topResp.Titles {
switch strings.TrimSpace(strings.ToLower(key)) {
case "pid":
indexMap[key] = index
t++
break
case "ppid":
indexMap[key] = index
t++
break
case "uid":
indexMap[key] = index
t++
break
case "cmd":
indexMap[key] = index
t++
break
default:
break
}
if t >= 4 {
break
}
}
item := ContainerPsInfo{}
if v, ok := indexMap["pid"]; ok {
pid, err := strconv.ParseUint(topResp.Processes[v][0], 10, 64)
if err != nil {
return nil, err
}
item.Pid = pid
}
if v, ok := indexMap["ppid"]; ok {
ppid, err := strconv.ParseUint(topResp.Processes[v][0], 10, 64)
if err != nil {
return nil, err
}
item.Ppid = ppid
}
if v, ok := indexMap["uid"]; ok {
item.Uid = topResp.Processes[v][0]
}
if v, ok := indexMap["cmd"]; ok {
item.Cmd = topResp.Processes[v]
}
result[cid] = item
}
return result, nil
} }
func (info *ContainersInfo) Update() error { func (info *ContainersInfo) Update() error {
info.lock.Lock() info.lock.Lock()
defer info.lock.Unlock() defer info.lock.Unlock()
i, s, err := getContainerInfo() i, s, t, err := getContainerInfo()
if err != nil { if err != nil {
return err return err
} }
info.inspectInfo = i info.inspectInfo = i
info.listInfo = s info.listInfo = s
info.topInfo = t
info.time = time.Now() info.time = time.Now()
return nil return nil
} }
...@@ -64,7 +134,7 @@ func init() { ...@@ -64,7 +134,7 @@ func init() {
} }
func initContainerInfo() error { func initContainerInfo() error {
inspect, lists, err := getContainerInfo() inspect, lists, tops, err := getContainerInfo()
if err != nil { if err != nil {
return err return err
} }
...@@ -73,6 +143,7 @@ func initContainerInfo() error { ...@@ -73,6 +143,7 @@ func initContainerInfo() error {
time: time.Now(), time: time.Now(),
inspectInfo: inspect, inspectInfo: inspect,
listInfo: lists, listInfo: lists,
topInfo: tops,
} }
return nil return nil
} }
...@@ -225,27 +296,33 @@ func findContainerIdByNSBatch(pids []uint64) (map[uint64]string, error) { ...@@ -225,27 +296,33 @@ func findContainerIdByNSBatch(pids []uint64) (map[uint64]string, error) {
} }
// getContainerInfo 获取所有正在运行的docker容器的详细信息 // getContainerInfo 获取所有正在运行的docker容器的详细信息
func getContainerInfo() (map[string]container.InspectResponse, map[string]container.Summary, error) { func getContainerInfo() (map[string]container.InspectResponse, map[string]container.Summary, map[string]container.TopResponse, error) {
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation()) cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil { if err != nil {
return nil, nil, err return nil, nil, nil, err
} }
defer func() { defer func() {
_ = cli.Close() _ = cli.Close()
}() }()
containerSum, err := cli.ContainerList(context.Background(), client.ContainerListOptions{All: false}) containerSum, err := cli.ContainerList(context.Background(), client.ContainerListOptions{All: false})
if err != nil { if err != nil {
return nil, nil, err return nil, nil, nil, err
} }
inspects := make(map[string]container.InspectResponse) inspects := make(map[string]container.InspectResponse)
lists := make(map[string]container.Summary) lists := make(map[string]container.Summary)
tops := make(map[string]container.TopResponse)
for _, c := range containerSum { for _, c := range containerSum {
inspect, innerErr := cli.ContainerInspect(context.Background(), c.ID) inspect, innerErr := cli.ContainerInspect(context.Background(), c.ID)
if innerErr != nil { if innerErr != nil {
return nil, nil, innerErr return nil, nil, nil, innerErr
} }
inspects[c.ID] = inspect inspects[c.ID] = inspect
lists[c.ID] = c lists[c.ID] = c
topInfo, innerErr := cli.ContainerTop(context.Background(), c.ID, nil)
if innerErr != nil {
return nil, nil, nil, innerErr
}
tops[c.ID] = topInfo
} }
return inspects, lists, nil return inspects, lists, tops, nil
} }
...@@ -3,6 +3,8 @@ module get-container ...@@ -3,6 +3,8 @@ module get-container
go 1.24.2 go 1.24.2
require ( require (
github.com/charmbracelet/bubbletea v1.3.10
github.com/charmbracelet/lipgloss v1.1.0
github.com/moby/moby/api v1.52.0-beta.2 github.com/moby/moby/api v1.52.0-beta.2
github.com/moby/moby/client v0.1.0-beta.2 github.com/moby/moby/client v0.1.0-beta.2
github.com/shirou/gopsutil/v4 v4.25.9 github.com/shirou/gopsutil/v4 v4.25.9
...@@ -11,29 +13,45 @@ require ( ...@@ -11,29 +13,45 @@ require (
require ( require (
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect
github.com/charmbracelet/x/ansi v0.10.1 // indirect
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect
github.com/charmbracelet/x/term v0.2.1 // indirect
github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/distribution/reference v0.6.0 // indirect github.com/distribution/reference v0.6.0 // indirect
github.com/docker/go-connections v0.6.0 // indirect github.com/docker/go-connections v0.6.0 // indirect
github.com/docker/go-units v0.5.0 // indirect github.com/docker/go-units v0.5.0 // indirect
github.com/ebitengine/purego v0.9.0 // indirect github.com/ebitengine/purego v0.9.0 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-ole/go-ole v1.2.6 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/term v0.5.2 // indirect github.com/moby/term v0.5.2 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/tklauser/go-sysconf v0.3.15 // indirect github.com/tklauser/go-sysconf v0.3.15 // indirect
github.com/tklauser/numcpus v0.10.0 // indirect github.com/tklauser/numcpus v0.10.0 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
go.opentelemetry.io/otel v1.35.0 // indirect go.opentelemetry.io/otel v1.35.0 // indirect
go.opentelemetry.io/otel/metric v1.35.0 // indirect go.opentelemetry.io/otel/metric v1.35.0 // indirect
go.opentelemetry.io/otel/trace v1.35.0 // indirect go.opentelemetry.io/otel/trace v1.35.0 // indirect
golang.org/x/sys v0.35.0 // indirect golang.org/x/sys v0.36.0 // indirect
golang.org/x/text v0.3.8 // indirect
) )
...@@ -5,17 +5,409 @@ import ( ...@@ -5,17 +5,409 @@ import (
"get-container/utils" "get-container/utils"
"os/exec" "os/exec"
"regexp" "regexp"
"sort"
"strconv" "strconv"
"strings" "strings"
) )
const (
DCUBinaryFile = "hy-smi"
PIDHeader = "PID"
PASIDHeader = "PASID"
HCUNodeHeader = "HCU Node(Include CPU sort)"
HCUIndexHeader = "HCU Index"
GPUIDHeader = "GPUID"
PCIBusHeader = "PCI BUS"
VRamUsedHeader = "VRAM USED(MiB)"
VRamUsedPercentHeader = "VRAM USED(%)"
SDMAUsedHeader = "SDMA USED"
)
var (
ReEmptyLine = regexp.MustCompile(`^\s*$`)
ReUselessLine = regexp.MustCompile(`^=[ =a-zA-Z0-9]*=$`)
ReInfoHeader = regexp.MustCompile(`^HCU\[(\d+)]\s+:\s+(.*)$`)
ReDriId = regexp.MustCompile(`(?mi)^Device\s*ID\s*:\s*([x0-9]*)$`)
ReVBIOSVersion = regexp.MustCompile(`(?mi)^VBIOS\s*version\s*:\s*([0-9a-zA-Z.]*)$`)
ReTempEdge = regexp.MustCompile(`(?mi)^Temperature\s*\(Sensor\s*edge\)\s*\(C\)\s*:\s*([0-9.]*)$`)
ReTempJunction = regexp.MustCompile(`(?mi)^Temperature\s*\(Sensor\s*junction\)\s*\(C\)\s*:\s*([0-9.]*)$`)
ReTempMem = regexp.MustCompile(`(?mi)^Temperature\s*\(Sensor\s*mem\)\s*\(C\)\s*:\s*([0-9.]*)$`)
ReTempCore = regexp.MustCompile(`(?mi)^Temperature\s*\(Sensor\s*core\)\s*\(C\)\s*:\s*([0-9.]*)$`)
ReFClk = regexp.MustCompile(`(?mi)^fclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`)
ReMClk = regexp.MustCompile(`(?mi)^mclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`)
ReSClk = regexp.MustCompile(`(?mi)^sclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`)
ReSOCClk = regexp.MustCompile(`(?mi)^socclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`)
RePCIClk = regexp.MustCompile(`(?mi)^pcie\s*clock\s*level\s*([0-9]*)\s*\(([0-9.a-zA-Z/]*)\s*,\s*([x0-9]*)\s([0-9a-zA-Z]*)\)$`)
RePreLevel = regexp.MustCompile(`(?mi)^Performance\s*Level\s*:\s*([0-9a-zA-Z]*)$`)
ReMaxPwr = regexp.MustCompile(`(?mi)^Max\s*Graphics\s*Package\s*Power\s*\((.*)\)\s*:\s*([0-9.]*)$`)
ReAvgPwr = regexp.MustCompile(`(?mi)^Average\s*Graphics\s*Package\s*Power\s*\((.*)\)\s*:\s*([0-9.]*)$`)
ReHCUUsage = regexp.MustCompile(`(?mi)^HCU\s*use\s*\(.*\)\s*:\s*([0-9.]*)$`)
ReHCUMemUsage = regexp.MustCompile(`(?mi)^HCU\s*memory\s*use\s*\(.*\)\s*:\s*([0-9.]*)$`)
ReHCUMemVendor = regexp.MustCompile(`(?mi)^HCU\s*Memory\s*Vendor\s*:\s*(.*)$`)
RePCIeRelay = regexp.MustCompile(`(?mi)^PCIe\s*Replay\s*Count\s*:\s*([0-9]*)$`)
ReSerialNum = regexp.MustCompile(`(?mi)^Serial\s*Number\s*:\s*([0-9a-zA-Z]*)$`)
ReVoltage = regexp.MustCompile(`(?mi)^Voltage\s*\((.*)\)\s*:\s*([0-9.]*)$`)
RePCIBus = regexp.MustCompile(`(?mi)^PCI\s*Bus\s*:\s*([0-9a-zA-Z.:]*)$`)
ReMECFWVersion = regexp.MustCompile(`(?mi)^MEC\s*Firmware\s*Version\s*:\s*([0-9.]*)$`)
ReMEC2FWVersion = regexp.MustCompile(`(?mi)^MEC2\s*Firmware\s*Version\s*:\s*([0-9.]*)$`)
ReRLCFWVersion = regexp.MustCompile(`(?mi)^RLC\s*Firmware\s*Version\s*:\s*([0-9.]*)$`)
ReSDMAFWVersion = regexp.MustCompile(`(?mi)^SDMA\s*Firmware\s*Version\s*:\s*([0-9.]*)$`)
ReSDMA2FWVersion = regexp.MustCompile(`(?mi)^SDMA2\s*Firmware\s*Version\s*:\s*([0-9.]*)$`)
ReSMCFWVersion = regexp.MustCompile(`(?mi)^SMC\s*Firmware\s*Version\s*:\s*([0-9.]*)$`)
ReCardSerial = regexp.MustCompile(`(?mi)^Card\s*Series\s*:\s*(.*)$`)
ReCardVendor = regexp.MustCompile(`(?mi)^Card\s*Vendor\s*:\s*(.*)$`)
)
type HYVersionInfo struct { type HYVersionInfo struct {
SMIVersion string SMIVersion string // --version
LibVersion string DriverVersion string // --showdriverversion
DriverVersion string }
func GetHYVersionInfo() (*HYVersionInfo, error) {
versionBytes, err := exec.Command(DCUBinaryFile, "--version").Output()
if err != nil {
return nil, err
}
driBytes, err := exec.Command(DCUBinaryFile, "--showdriverversion").Output()
if err != nil {
return nil, err
}
return &HYVersionInfo{
SMIVersion: strings.Trim(strings.TrimSpace(string(versionBytes)), "\n"),
DriverVersion: strings.Trim(strings.TrimSpace(string(driBytes)), "\n"),
}, nil
} }
type DCUInfo struct { type DCUInfo struct {
Id int // id
Name string // DCU名称
PerformanceLevel string // 性能等级
FanSpeed float32 // 风扇转速
Temperature float32 // 平均温图
PwrUsage int16
PwrCapacity int16
BusId string
MemTotal int32
MemUsed int32
}
type ClockInfo struct {
Level int // 时钟等级
Freq string // 频率
}
type PcieClockInfo struct {
Level int // 时钟等级
Freq string // 频率
BandWidth string // 带宽
Times string // 倍率
}
// SMIAllOutput hy-smi -a输出的信息,列出了DCU全面的信息
type SMIAllOutput struct {
Id int
DeviceId string
VBIOSVersion string
TempEdge float32
TempJunction float32
TempMem float32
TempCores float32
FClock *ClockInfo
MClock *ClockInfo
SClock *ClockInfo
SOCClock *ClockInfo
PCIEClock *PcieClockInfo
PerLevel string
MaxPwr float32
AvgPwr float32
HCUUsage float32
HCUMemUsage float32
HCUMemVendor string
PCIERelayCount int
SerialNumber string
Voltage float32
PCIBus string
MECFWVersion string
MEC2FWVersion string
RLCFWVersion string
SDMAVersion string
SDMA2Version string
SMCVersion string
CardSeries string
CardVendor string
}
func GetSMIAllOutput() ([]*SMIAllOutput, error) {
b, err := exec.Command(DCUBinaryFile, "-a").Output()
if err != nil {
return nil, err
}
lines := strings.Split(strings.Trim(strings.TrimSpace(string(b)), "\n"), "\n")
info := make(map[int][]string)
for _, line := range lines {
if ReUselessLine.MatchString(line) || ReEmptyLine.MatchString(line) {
continue
}
if ReInfoHeader.MatchString(line) {
fields := ReInfoHeader.FindStringSubmatch(strings.TrimSpace(strings.ReplaceAll(line, "\t", " ")))
if len(fields) < 2 {
continue
}
id, innerErr := strconv.Atoi(fields[1])
if innerErr != nil {
return nil, innerErr
}
if v, ok := info[id]; !ok {
info[id] = make([]string, 0)
info[id] = append(v, fields[2])
} else {
info[id] = append(v, fields[2])
}
}
}
result := make([]*SMIAllOutput, 0)
for k, v := range info {
item, innerErr := parseSMIAllOutput(k, strings.Join(v, "\n"))
if innerErr != nil {
return nil, innerErr
}
if item != nil {
result = append(result, item)
}
}
// 按照id进行排序
sort.Slice(result, func(i, j int) bool {
return result[i].Id < result[j].Id
})
return result, nil
}
func parseSMIAllOutput(id int, str string) (*SMIAllOutput, error) {
if len(strings.TrimSpace(str)) == 0 {
return nil, nil
}
result := SMIAllOutput{}
result.Id = id
if s := regMatch(ReDriId, str, 1); s != nil {
result.DeviceId = s[0]
}
if s := regMatch(ReVBIOSVersion, str, 1); s != nil {
result.VBIOSVersion = s[0]
}
if s := regMatch(ReTempEdge, str, 1); s != nil {
if t, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32); err == nil {
result.TempEdge = float32(t)
}
}
if s := regMatch(ReTempJunction, str, 1); s != nil {
if t, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32); err == nil {
result.TempJunction = float32(t)
}
}
if s := regMatch(ReTempMem, str, 1); s != nil {
if t, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32); err == nil {
result.TempMem = float32(t)
}
}
if s := regMatch(ReTempCore, str, 1); s != nil {
if t, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32); err == nil {
result.TempCores = float32(t)
}
}
if s := regMatch(ReFClk, str, 1, 2); s != nil {
c := ClockInfo{}
level, err := strconv.Atoi(strings.TrimSpace(s[0]))
if err == nil {
c.Level = level
}
c.Freq = strings.TrimSpace(s[1])
result.FClock = &c
}
if s := regMatch(ReMClk, str, 1, 2); s != nil {
c := ClockInfo{}
level, err := strconv.Atoi(strings.TrimSpace(s[0]))
if err == nil {
c.Level = level
}
c.Freq = strings.TrimSpace(s[1])
result.MClock = &c
}
if s := regMatch(ReSClk, str, 1, 2); s != nil {
c := ClockInfo{}
level, err := strconv.Atoi(strings.TrimSpace(s[0]))
if err == nil {
c.Level = level
}
c.Freq = strings.TrimSpace(s[1])
result.SClock = &c
}
if s := regMatch(ReSOCClk, str, 1, 2); s != nil {
c := ClockInfo{}
level, err := strconv.Atoi(strings.TrimSpace(s[0]))
if err == nil {
c.Level = level
}
c.Freq = strings.TrimSpace(s[1])
result.SOCClock = &c
}
if s := regMatch(RePCIClk, str, 1, 2); s != nil {
c := ClockInfo{}
level, err := strconv.Atoi(strings.TrimSpace(s[0]))
if err == nil {
c.Level = level
}
c.Freq = strings.TrimSpace(s[1])
result.SOCClock = &c
}
if s := regMatch(RePCIClk, str, 1, 2, 3, 4); s != nil {
c := PcieClockInfo{}
level, err := strconv.Atoi(strings.TrimSpace(s[0]))
if err == nil {
c.Level = level
}
c.BandWidth = strings.TrimSpace(s[1])
c.Times = strings.TrimSpace(s[2])
c.Freq = strings.TrimSpace(s[3])
result.PCIEClock = &c
}
if s := regMatch(RePreLevel, str, 1); s != nil {
result.PerLevel = s[0]
}
if s := regMatch(ReMaxPwr, str, 2); s != nil {
p, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32)
if err == nil {
result.MaxPwr = float32(p)
}
}
if s := regMatch(ReAvgPwr, str, 2); s != nil {
p, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32)
if err == nil {
result.AvgPwr = float32(p)
}
}
if s := regMatch(ReHCUUsage, str, 1); s != nil {
p, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32)
if err == nil {
result.HCUUsage = float32(p)
}
}
if s := regMatch(ReHCUMemUsage, str, 1); s != nil {
p, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32)
if err == nil {
result.HCUMemUsage = float32(p)
}
}
if s := regMatch(ReHCUMemVendor, str, 1); s != nil {
result.HCUMemVendor = s[0]
}
if s := regMatch(RePCIeRelay, str, 1); s != nil {
i, err := strconv.Atoi(strings.TrimSpace(s[0]))
if err == nil {
result.PCIERelayCount = i
}
}
if s := regMatch(ReSerialNum, str, 1); s != nil {
result.SerialNumber = s[0]
}
if s := regMatch(ReVoltage, str, 2); s != nil {
p, err := strconv.ParseFloat(strings.TrimSpace(s[0]), 32)
if err == nil {
result.Voltage = float32(p)
}
}
if s := regMatch(RePCIBus, str, 2); s != nil {
result.PCIBus = s[0]
}
if s := regMatch(ReMECFWVersion, str, 1); s != nil {
result.MECFWVersion = s[0]
}
if s := regMatch(ReMEC2FWVersion, str, 1); s != nil {
result.MEC2FWVersion = s[0]
}
if s := regMatch(ReRLCFWVersion, str, 1); s != nil {
result.RLCFWVersion = s[0]
}
if s := regMatch(ReSDMAFWVersion, str, 1); s != nil {
result.SDMAVersion = s[0]
}
if s := regMatch(ReSDMA2FWVersion, str, 1); s != nil {
result.SDMA2Version = s[0]
}
if s := regMatch(ReSMCFWVersion, str, 1); s != nil {
result.SMCVersion = s[0]
}
if s := regMatch(ReCardSerial, str, 1); s != nil {
result.CardSeries = s[0]
}
if s := regMatch(ReCardVendor, str, 1); s != nil {
result.CardVendor = s[0]
}
return &result, nil
}
// DCURunningInfo DCU运行状态信息
type DCURunningInfo struct {
Id int
Temp float32
AvgPower float32
PerformanceLevel string
MemPerc float32
HCUPerc float32
}
// GetRunningInfo 获取DCU运行相关信息
func GetRunningInfo() ([]DCURunningInfo, error) {
output, err := exec.Command(DCUBinaryFile).Output()
if err != nil {
return nil, err
}
return parseRunningInfo(string(output))
}
func parseRunningInfo(info string) ([]DCURunningInfo, error) {
lines := strings.Split(strings.Trim(strings.TrimSpace(info), "\n"), "\n")
result := make([]DCURunningInfo, 0)
for _, line := range lines {
if ReUselessLine.MatchString(line) || ReEmptyLine.MatchString(line) {
continue
}
fields := strings.Fields(strings.TrimSpace(line))
if len(fields) < 8 {
continue
}
item := DCURunningInfo{}
id, err := strconv.Atoi(fields[0])
if err != nil {
continue
}
item.Id = id
temp, err := strconv.ParseFloat(strings.TrimSuffix(strings.ToLower(fields[1]), "c"), 32)
if err != nil {
return nil, err
}
item.Temp = float32(temp)
avgPwr, err := strconv.ParseFloat(strings.TrimSuffix(strings.ToLower(fields[2]), "w"), 32)
if err != nil {
return nil, err
}
item.AvgPower = float32(avgPwr)
item.PerformanceLevel = fields[3]
vram, err := strconv.ParseFloat(strings.TrimSuffix(fields[5], "%"), 32)
if err != nil {
return nil, err
}
item.MemPerc = float32(vram)
utl, err := strconv.ParseFloat(strings.TrimSuffix(fields[6], "%"), 32)
if err != nil {
return nil, err
}
item.HCUPerc = float32(utl)
result = append(result, item)
}
return result, nil
} }
type DCUPidInfo struct { type DCUPidInfo struct {
...@@ -30,26 +422,9 @@ type DCUPidInfo struct { ...@@ -30,26 +422,9 @@ type DCUPidInfo struct {
SDMAUsed int SDMAUsed int
} }
var (
ReEmptyLine = regexp.MustCompile(`^\s*$`)
ReUselessLine = regexp.MustCompile(`^=[ =a-zA-Z0-9]*=$`)
)
const (
PIDHeader = "PID"
PASIDHeader = "PASID"
HCUNodeHeader = "HCU Node(Include CPU sort)"
HCUIndexHeader = "HCU Index"
GPUIDHeader = "GPUID"
PCIBusHeader = "PCI BUS"
VRamUsedHeader = "VRAM USED(MiB)"
VRamUsedPercentHeader = "VRAM USED(%)"
SDMAUsedHeader = "SDMA USED"
)
// GetDCUPidInfo 获取Pid相关信息 // GetDCUPidInfo 获取Pid相关信息
func GetDCUPidInfo() ([]DCUPidInfo, error) { func GetDCUPidInfo() ([]DCUPidInfo, error) {
output, err := exec.Command("hy-smi", "--showpids").Output() output, err := exec.Command(DCUBinaryFile, "--showpids").Output()
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -146,3 +521,20 @@ func parseDCUPidInfo(s string) ([]DCUPidInfo, error) { ...@@ -146,3 +521,20 @@ func parseDCUPidInfo(s string) ([]DCUPidInfo, error) {
} }
return result, nil return result, nil
} }
func regMatch(reg *regexp.Regexp, s string, index ...int) []string {
i := reg.FindStringSubmatch(s)
if i == nil {
return nil
}
result := make([]string, 0)
l := len(i)
for _, ind := range index {
if ind >= 0 && ind < l {
result = append(result, i[ind])
} else {
return nil
}
}
return result
}
...@@ -2,7 +2,12 @@ package gpu ...@@ -2,7 +2,12 @@ package gpu
import ( import (
"encoding/json" "encoding/json"
"os"
"sort"
"strconv"
"strings"
"testing" "testing"
"time"
) )
const ( const (
...@@ -206,3 +211,80 @@ func TestParseDCUPidInfo(t *testing.T) { ...@@ -206,3 +211,80 @@ func TestParseDCUPidInfo(t *testing.T) {
t.Logf("%+v\n", info) t.Logf("%+v\n", info)
} }
} }
const DCUPidInfoStr = `
============================ System Management Interface =============================
======================================================================================
HCU Temp AvgPwr Perf PwrCap VRAM% HCU% Mode
0 34.0C 140.0W manual 800.0W 0% 0.0% Normal
1 35.0C 140.0W manual 800.0W 0% 0.0% Normal
2 35.0C 140.0W manual 800.0W 0% 0.0% Normal
3 33.0C 140.0W manual 800.0W 0% 0.0% Normal
4 36.0C 140.0W manual 800.0W 0% 0.0% Normal
5 36.0C 140.0W manual 800.0W 0% 0.0% Normal
6 34.0C 140.0W manual 800.0W 0% 0.0% Normal
7 34.0C 140.0W manual 800.0W 0% 0.0% Normal
======================================================================================
=================================== End of SMI Log ===================================
`
func TestParseRunningInfo(t *testing.T) {
i, e := parseRunningInfo(DCUPidInfoStr)
if e != nil {
t.Fatal(e)
}
for _, info := range i {
t.Logf("%+v\n", info)
}
}
func TestAbc(t *testing.T) {
b, e := os.ReadFile("../test-data/hy.log")
if e != nil {
t.Error(e)
}
timeStart := time.Now()
lines := strings.Split(strings.Trim(strings.TrimSpace(string(b)), "\n"), "\n")
info := make(map[int][]string)
for _, line := range lines {
if ReUselessLine.MatchString(line) || ReEmptyLine.MatchString(line) {
continue
}
if ReInfoHeader.MatchString(line) {
fields := ReInfoHeader.FindStringSubmatch(strings.TrimSpace(strings.ReplaceAll(line, "\t", " ")))
if fields == nil || len(fields) <= 2 {
continue
}
id, innerErr := strconv.Atoi(fields[1])
if innerErr != nil {
t.Error(innerErr)
}
if v, ok := info[id]; !ok {
info[id] = make([]string, 0)
info[id] = append(v, fields[2])
} else {
info[id] = append(v, fields[2])
}
}
}
result := make([]*SMIAllOutput, 0)
for k, v := range info {
item, innerErr := parseSMIAllOutput(k, strings.Join(v, "\n"))
if innerErr != nil {
t.Error(innerErr)
}
if item != nil {
result = append(result, item)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i].Id < result[j].Id
})
end := time.Now()
t.Log(end.Sub(timeStart).Seconds())
for _, i := range result {
t.Logf("%+v", *i)
}
}
============================ System Management Interface =============================
======================================================================================
Driver Version: 6.3.2-V1.7.4
======================================================================================
======================================================================================
HCU[0] : Device ID: 0x6320
HCU[1] : Device ID: 0x6320
HCU[2] : Device ID: 0x6320
HCU[3] : Device ID: 0x6320
HCU[4] : Device ID: 0x6320
HCU[5] : Device ID: 0x6320
HCU[6] : Device ID: 0x6320
HCU[7] : Device ID: 0x6320
======================================================================================
======================================================================================
HCU[0] : VBIOS version: 5.717.002200A.685184
HCU[1] : VBIOS version: 5.717.002200A.685184
HCU[2] : VBIOS version: 5.717.002200A.685184
HCU[3] : VBIOS version: 5.717.002200A.685184
HCU[4] : VBIOS version: 5.717.002200A.685184
HCU[5] : VBIOS version: 5.717.002200A.685184
HCU[6] : VBIOS version: 5.717.002200A.685184
HCU[7] : VBIOS version: 5.717.002200A.685184
======================================================================================
======================================================================================
HCU[0] : Temperature (Sensor edge) (C): 36.0
HCU[0] : Temperature (Sensor junction) (C): 40.0
HCU[0] : Temperature (Sensor mem) (C): 31.0
HCU[0] : Temperature (Sensor core) (C): 34.0
HCU[1] : Temperature (Sensor edge) (C): 37.0
HCU[1] : Temperature (Sensor junction) (C): 40.0
HCU[1] : Temperature (Sensor mem) (C): 30.0
HCU[1] : Temperature (Sensor core) (C): 35.0
HCU[2] : Temperature (Sensor edge) (C): 37.0
HCU[2] : Temperature (Sensor junction) (C): 40.0
HCU[2] : Temperature (Sensor mem) (C): 29.0
HCU[2] : Temperature (Sensor core) (C): 35.0
HCU[3] : Temperature (Sensor edge) (C): 36.0
HCU[3] : Temperature (Sensor junction) (C): 39.0
HCU[3] : Temperature (Sensor mem) (C): 28.0
HCU[3] : Temperature (Sensor core) (C): 33.0
HCU[4] : Temperature (Sensor edge) (C): 38.0
HCU[4] : Temperature (Sensor junction) (C): 41.0
HCU[4] : Temperature (Sensor mem) (C): 32.0
HCU[4] : Temperature (Sensor core) (C): 36.0
HCU[5] : Temperature (Sensor edge) (C): 39.0
HCU[5] : Temperature (Sensor junction) (C): 42.0
HCU[5] : Temperature (Sensor mem) (C): 31.0
HCU[5] : Temperature (Sensor core) (C): 36.0
HCU[6] : Temperature (Sensor edge) (C): 37.0
HCU[6] : Temperature (Sensor junction) (C): 40.0
HCU[6] : Temperature (Sensor mem) (C): 29.0
HCU[6] : Temperature (Sensor core) (C): 34.0
HCU[7] : Temperature (Sensor edge) (C): 35.0
HCU[7] : Temperature (Sensor junction) (C): 40.0
HCU[7] : Temperature (Sensor mem) (C): 29.0
HCU[7] : Temperature (Sensor core) (C): 34.0
======================================================================================
======================================================================================
HCU[0] : fclk clock level: 0 (1500Mhz)
HCU[0] : mclk clock level: 0 (1500Mhz)
HCU[0] : sclk clock level: 9 (1500Mhz)
HCU[0] : socclk clock level: 0 (1100Mhz)
HCU[0] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[1] : fclk clock level: 0 (1500Mhz)
HCU[1] : mclk clock level: 0 (1500Mhz)
HCU[1] : sclk clock level: 9 (1500Mhz)
HCU[1] : socclk clock level: 0 (1100Mhz)
HCU[1] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[2] : fclk clock level: 0 (1500Mhz)
HCU[2] : mclk clock level: 0 (1500Mhz)
HCU[2] : sclk clock level: 9 (1500Mhz)
HCU[2] : socclk clock level: 0 (1100Mhz)
HCU[2] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[3] : fclk clock level: 0 (1500Mhz)
HCU[3] : mclk clock level: 0 (1500Mhz)
HCU[3] : sclk clock level: 9 (1500Mhz)
HCU[3] : socclk clock level: 0 (1100Mhz)
HCU[3] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[4] : fclk clock level: 0 (1500Mhz)
HCU[4] : mclk clock level: 0 (1500Mhz)
HCU[4] : sclk clock level: 9 (1500Mhz)
HCU[4] : socclk clock level: 0 (1100Mhz)
HCU[4] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[5] : fclk clock level: 0 (1500Mhz)
HCU[5] : mclk clock level: 0 (1500Mhz)
HCU[5] : sclk clock level: 9 (1500Mhz)
HCU[5] : socclk clock level: 0 (1100Mhz)
HCU[5] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[6] : fclk clock level: 0 (1500Mhz)
HCU[6] : mclk clock level: 0 (1500Mhz)
HCU[6] : sclk clock level: 9 (1500Mhz)
HCU[6] : socclk clock level: 0 (1100Mhz)
HCU[6] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[7] : fclk clock level: 0 (1500Mhz)
HCU[7] : mclk clock level: 0 (1500Mhz)
HCU[7] : sclk clock level: 9 (1500Mhz)
HCU[7] : socclk clock level: 0 (1100Mhz)
HCU[7] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
======================================================================================
======================================================================================
HCU[0] : Performance Level: manual
HCU[1] : Performance Level: manual
HCU[2] : Performance Level: manual
HCU[3] : Performance Level: manual
HCU[4] : Performance Level: manual
HCU[5] : Performance Level: manual
HCU[6] : Performance Level: manual
HCU[7] : Performance Level: manual
======================================================================================
======================================================================================
HCU[0] : Max Graphics Package Power (W): 800.0
HCU[1] : Max Graphics Package Power (W): 800.0
HCU[2] : Max Graphics Package Power (W): 800.0
HCU[3] : Max Graphics Package Power (W): 800.0
HCU[4] : Max Graphics Package Power (W): 800.0
HCU[5] : Max Graphics Package Power (W): 800.0
HCU[6] : Max Graphics Package Power (W): 800.0
HCU[7] : Max Graphics Package Power (W): 800.0
======================================================================================
======================================================================================
HCU[0] : Average Graphics Package Power (W): 140.0
HCU[1] : Average Graphics Package Power (W): 140.0
HCU[2] : Average Graphics Package Power (W): 140.0
HCU[3] : Average Graphics Package Power (W): 140.0
HCU[4] : Average Graphics Package Power (W): 140.0
HCU[5] : Average Graphics Package Power (W): 140.0
HCU[6] : Average Graphics Package Power (W): 140.0
HCU[7] : Average Graphics Package Power (W): 140.0
======================================================================================
======================================================================================
HCU[0] : Supported fclk frequencies on HCU 0
HCU[0] : 0: 1500Mhz *
HCU[0] :
HCU[0] : Supported mclk frequencies on HCU 0
HCU[0] : 0: 1500Mhz *
HCU[0] :
HCU[0] : Supported sclk frequencies on HCU 0
HCU[0] : 0: 300Mhz
HCU[0] : 1: 600Mhz
HCU[0] : 2: 800Mhz
HCU[0] : 3: 900Mhz
HCU[0] : 4: 1000Mhz
HCU[0] : 5: 1100Mhz
HCU[0] : 6: 1200Mhz
HCU[0] : 7: 1300Mhz
HCU[0] : 8: 1400Mhz
HCU[0] : 9: 1500Mhz *
HCU[0] : *: 1600Mhz
HCU[0] :
HCU[0] : Supported socclk frequencies on HCU 0
HCU[0] : 0: 1100Mhz *
HCU[0] :
HCU[0] : Supported pcie frequencies on HCU 0
HCU[0] : 0: 2.5GT/s, x16 300Mhz
HCU[0] : 1: 8.0GT/s, x16 625Mhz
HCU[0] : 2: 32.0GT/s, x16 1100Mhz *
HCU[0] :
HCU[1] : Supported fclk frequencies on HCU 1
HCU[1] : 0: 1500Mhz *
HCU[1] :
HCU[1] : Supported mclk frequencies on HCU 1
HCU[1] : 0: 1500Mhz *
HCU[1] :
HCU[1] : Supported sclk frequencies on HCU 1
HCU[1] : 0: 300Mhz
HCU[1] : 1: 600Mhz
HCU[1] : 2: 800Mhz
HCU[1] : 3: 900Mhz
HCU[1] : 4: 1000Mhz
HCU[1] : 5: 1100Mhz
HCU[1] : 6: 1200Mhz
HCU[1] : 7: 1300Mhz
HCU[1] : 8: 1400Mhz
HCU[1] : 9: 1500Mhz *
HCU[1] : *: 1600Mhz
HCU[1] :
HCU[1] : Supported socclk frequencies on HCU 1
HCU[1] : 0: 1100Mhz *
HCU[1] :
HCU[1] : Supported pcie frequencies on HCU 1
HCU[1] : 0: 2.5GT/s, x16 300Mhz
HCU[1] : 1: 8.0GT/s, x16 625Mhz
HCU[1] : 2: 32.0GT/s, x16 1100Mhz *
HCU[1] :
HCU[2] : Supported fclk frequencies on HCU 2
HCU[2] : 0: 1500Mhz *
HCU[2] :
HCU[2] : Supported mclk frequencies on HCU 2
HCU[2] : 0: 1500Mhz *
HCU[2] :
HCU[2] : Supported sclk frequencies on HCU 2
HCU[2] : 0: 300Mhz
HCU[2] : 1: 600Mhz
HCU[2] : 2: 800Mhz
HCU[2] : 3: 900Mhz
HCU[2] : 4: 1000Mhz
HCU[2] : 5: 1100Mhz
HCU[2] : 6: 1200Mhz
HCU[2] : 7: 1300Mhz
HCU[2] : 8: 1400Mhz
HCU[2] : 9: 1500Mhz *
HCU[2] : *: 1600Mhz
HCU[2] :
HCU[2] : Supported socclk frequencies on HCU 2
HCU[2] : 0: 1100Mhz *
HCU[2] :
HCU[2] : Supported pcie frequencies on HCU 2
HCU[2] : 0: 2.5GT/s, x16 300Mhz
HCU[2] : 1: 8.0GT/s, x16 625Mhz
HCU[2] : 2: 32.0GT/s, x16 1100Mhz *
HCU[2] :
HCU[3] : Supported fclk frequencies on HCU 3
HCU[3] : 0: 1500Mhz *
HCU[3] :
HCU[3] : Supported mclk frequencies on HCU 3
HCU[3] : 0: 1500Mhz *
HCU[3] :
HCU[3] : Supported sclk frequencies on HCU 3
HCU[3] : 0: 300Mhz
HCU[3] : 1: 600Mhz
HCU[3] : 2: 800Mhz
HCU[3] : 3: 900Mhz
HCU[3] : 4: 1000Mhz
HCU[3] : 5: 1100Mhz
HCU[3] : 6: 1200Mhz
HCU[3] : 7: 1300Mhz
HCU[3] : 8: 1400Mhz
HCU[3] : 9: 1500Mhz *
HCU[3] : *: 1600Mhz
HCU[3] :
HCU[3] : Supported socclk frequencies on HCU 3
HCU[3] : 0: 1100Mhz *
HCU[3] :
HCU[3] : Supported pcie frequencies on HCU 3
HCU[3] : 0: 2.5GT/s, x16 300Mhz
HCU[3] : 1: 8.0GT/s, x16 625Mhz
HCU[3] : 2: 32.0GT/s, x16 1100Mhz *
HCU[3] :
HCU[4] : Supported fclk frequencies on HCU 4
HCU[4] : 0: 1500Mhz *
HCU[4] :
HCU[4] : Supported mclk frequencies on HCU 4
HCU[4] : 0: 1500Mhz *
HCU[4] :
HCU[4] : Supported sclk frequencies on HCU 4
HCU[4] : 0: 300Mhz
HCU[4] : 1: 600Mhz
HCU[4] : 2: 800Mhz
HCU[4] : 3: 900Mhz
HCU[4] : 4: 1000Mhz
HCU[4] : 5: 1100Mhz
HCU[4] : 6: 1200Mhz
HCU[4] : 7: 1300Mhz
HCU[4] : 8: 1400Mhz
HCU[4] : 9: 1500Mhz *
HCU[4] : *: 1600Mhz
HCU[4] :
HCU[4] : Supported socclk frequencies on HCU 4
HCU[4] : 0: 1100Mhz *
HCU[4] :
HCU[4] : Supported pcie frequencies on HCU 4
HCU[4] : 0: 2.5GT/s, x16 300Mhz
HCU[4] : 1: 8.0GT/s, x16 625Mhz
HCU[4] : 2: 32.0GT/s, x16 1100Mhz *
HCU[4] :
HCU[5] : Supported fclk frequencies on HCU 5
HCU[5] : 0: 1500Mhz *
HCU[5] :
HCU[5] : Supported mclk frequencies on HCU 5
HCU[5] : 0: 1500Mhz *
HCU[5] :
HCU[5] : Supported sclk frequencies on HCU 5
HCU[5] : 0: 300Mhz
HCU[5] : 1: 600Mhz
HCU[5] : 2: 800Mhz
HCU[5] : 3: 900Mhz
HCU[5] : 4: 1000Mhz
HCU[5] : 5: 1100Mhz
HCU[5] : 6: 1200Mhz
HCU[5] : 7: 1300Mhz
HCU[5] : 8: 1400Mhz
HCU[5] : 9: 1500Mhz *
HCU[5] : *: 1600Mhz
HCU[5] :
HCU[5] : Supported socclk frequencies on HCU 5
HCU[5] : 0: 1100Mhz *
HCU[5] :
HCU[5] : Supported pcie frequencies on HCU 5
HCU[5] : 0: 2.5GT/s, x16 300Mhz
HCU[5] : 1: 8.0GT/s, x16 625Mhz
HCU[5] : 2: 32.0GT/s, x16 1100Mhz *
HCU[5] :
HCU[6] : Supported fclk frequencies on HCU 6
HCU[6] : 0: 1500Mhz *
HCU[6] :
HCU[6] : Supported mclk frequencies on HCU 6
HCU[6] : 0: 1500Mhz *
HCU[6] :
HCU[6] : Supported sclk frequencies on HCU 6
HCU[6] : 0: 300Mhz
HCU[6] : 1: 600Mhz
HCU[6] : 2: 800Mhz
HCU[6] : 3: 900Mhz
HCU[6] : 4: 1000Mhz
HCU[6] : 5: 1100Mhz
HCU[6] : 6: 1200Mhz
HCU[6] : 7: 1300Mhz
HCU[6] : 8: 1400Mhz
HCU[6] : 9: 1500Mhz *
HCU[6] : *: 1600Mhz
HCU[6] :
HCU[6] : Supported socclk frequencies on HCU 6
HCU[6] : 0: 1100Mhz *
HCU[6] :
HCU[6] : Supported pcie frequencies on HCU 6
HCU[6] : 0: 2.5GT/s, x16 300Mhz
HCU[6] : 1: 8.0GT/s, x16 625Mhz
HCU[6] : 2: 32.0GT/s, x16 1100Mhz *
HCU[6] :
HCU[7] : Supported fclk frequencies on HCU 7
HCU[7] : 0: 1500Mhz *
HCU[7] :
HCU[7] : Supported mclk frequencies on HCU 7
HCU[7] : 0: 1500Mhz *
HCU[7] :
HCU[7] : Supported sclk frequencies on HCU 7
HCU[7] : 0: 300Mhz
HCU[7] : 1: 600Mhz
HCU[7] : 2: 800Mhz
HCU[7] : 3: 900Mhz
HCU[7] : 4: 1000Mhz
HCU[7] : 5: 1100Mhz
HCU[7] : 6: 1200Mhz
HCU[7] : 7: 1300Mhz
HCU[7] : 8: 1400Mhz
HCU[7] : 9: 1500Mhz *
HCU[7] : *: 1600Mhz
HCU[7] :
HCU[7] : Supported socclk frequencies on HCU 7
HCU[7] : 0: 1100Mhz *
HCU[7] :
HCU[7] : Supported pcie frequencies on HCU 7
HCU[7] : 0: 2.5GT/s, x16 300Mhz
HCU[7] : 1: 8.0GT/s, x16 625Mhz
HCU[7] : 2: 32.0GT/s, x16 1100Mhz *
HCU[7] :
======================================================================================
======================================================================================
HCU[0] : HCU use (%): 0.0
HCU[1] : HCU use (%): 0.0
HCU[2] : HCU use (%): 0.0
HCU[3] : HCU use (%): 0.0
HCU[4] : HCU use (%): 0.0
HCU[5] : HCU use (%): 0.0
HCU[6] : HCU use (%): 0.0
HCU[7] : HCU use (%): 0.0
======================================================================================
======================================================================================
HCU[0] : HCU memory use (%): 0
HCU[1] : HCU memory use (%): 0
HCU[2] : HCU memory use (%): 0
HCU[3] : HCU memory use (%): 0
HCU[4] : HCU memory use (%): 0
HCU[5] : HCU memory use (%): 0
HCU[6] : HCU memory use (%): 0
HCU[7] : HCU memory use (%): 0
======================================================================================
======================================================================================
HCU[0] : HCU Memory Vendor: samsung
HCU[1] : HCU Memory Vendor: samsung
HCU[2] : HCU Memory Vendor: samsung
HCU[3] : HCU Memory Vendor: samsung
HCU[4] : HCU Memory Vendor: samsung
HCU[5] : HCU Memory Vendor: samsung
HCU[6] : HCU Memory Vendor: samsung
HCU[7] : HCU Memory Vendor: samsung
======================================================================================
======================================================================================
HCU[0] : PCIe Replay Count: 0
HCU[1] : PCIe Replay Count: 0
HCU[2] : PCIe Replay Count: 0
HCU[3] : PCIe Replay Count: 0
HCU[4] : PCIe Replay Count: 0
HCU[5] : PCIe Replay Count: 0
HCU[6] : PCIe Replay Count: 0
HCU[7] : PCIe Replay Count: 0
======================================================================================
======================================================================================
HCU[0] : Serial Number: T6V51408031001
HCU[1] : Serial Number: T6V51213060601
HCU[2] : Serial Number: T6V51609020301
HCU[3] : Serial Number: T6V51607010801
HCU[4] : Serial Number: T6V51409010501
HCU[5] : Serial Number: T6V51420070301
HCU[6] : Serial Number: T6V51401040101
HCU[7] : Serial Number: T6V51205080901
======================================================================================
======================================================================================
No KFD PIDs currently running!
======================================================================================
======================================================================================
HCU[0] : Voltage (mV): 956
HCU[1] : Voltage (mV): 956
HCU[2] : Voltage (mV): 956
HCU[3] : Voltage (mV): 956
HCU[4] : Voltage (mV): 956
HCU[5] : Voltage (mV): 956
HCU[6] : Voltage (mV): 956
HCU[7] : Voltage (mV): 956
======================================================================================
======================================================================================
HCU[0] : PCI Bus: 0000:9f:00.0
HCU[1] : PCI Bus: 0000:56:00.0
HCU[2] : PCI Bus: 0000:5d:00.0
HCU[3] : PCI Bus: 0000:05:00.0
HCU[4] : PCI Bus: 0000:e5:00.0
HCU[5] : PCI Bus: 0000:c1:00.0
HCU[6] : PCI Bus: 0000:ca:00.0
HCU[7] : PCI Bus: 0000:b1:00.0
======================================================================================
======================================================================================
HCU[0] : MEC Firmware Version: 42
HCU[0] : MEC2 Firmware Version: 42
HCU[0] : RLC Firmware Version: 1
HCU[0] : SDMA Firmware Version: 10
HCU[0] : SDMA2 Firmware Version: 10
HCU[0] : SMC Firmware Version: 00.00.32.01
HCU[1] : MEC Firmware Version: 42
HCU[1] : MEC2 Firmware Version: 42
HCU[1] : RLC Firmware Version: 1
HCU[1] : SDMA Firmware Version: 10
HCU[1] : SDMA2 Firmware Version: 10
HCU[1] : SMC Firmware Version: 00.00.32.01
HCU[2] : MEC Firmware Version: 42
HCU[2] : MEC2 Firmware Version: 42
HCU[2] : RLC Firmware Version: 1
HCU[2] : SDMA Firmware Version: 10
HCU[2] : SDMA2 Firmware Version: 10
HCU[2] : SMC Firmware Version: 00.00.32.01
HCU[3] : MEC Firmware Version: 42
HCU[3] : MEC2 Firmware Version: 42
HCU[3] : RLC Firmware Version: 1
HCU[3] : SDMA Firmware Version: 10
HCU[3] : SDMA2 Firmware Version: 10
HCU[3] : SMC Firmware Version: 00.00.32.01
HCU[4] : MEC Firmware Version: 42
HCU[4] : MEC2 Firmware Version: 42
HCU[4] : RLC Firmware Version: 1
HCU[4] : SDMA Firmware Version: 10
HCU[4] : SDMA2 Firmware Version: 10
HCU[4] : SMC Firmware Version: 00.00.32.01
HCU[5] : MEC Firmware Version: 42
HCU[5] : MEC2 Firmware Version: 42
HCU[5] : RLC Firmware Version: 1
HCU[5] : SDMA Firmware Version: 10
HCU[5] : SDMA2 Firmware Version: 10
HCU[5] : SMC Firmware Version: 00.00.32.01
HCU[6] : MEC Firmware Version: 42
HCU[6] : MEC2 Firmware Version: 42
HCU[6] : RLC Firmware Version: 1
HCU[6] : SDMA Firmware Version: 10
HCU[6] : SDMA2 Firmware Version: 10
HCU[6] : SMC Firmware Version: 00.00.32.01
HCU[7] : MEC Firmware Version: 42
HCU[7] : MEC2 Firmware Version: 42
HCU[7] : RLC Firmware Version: 1
HCU[7] : SDMA Firmware Version: 10
HCU[7] : SDMA2 Firmware Version: 10
HCU[7] : SMC Firmware Version: 00.00.32.01
======================================================================================
======================================================================================
HCU[0] : Card Series: BW200
HCU[0] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[1] : Card Series: BW200
HCU[1] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[2] : Card Series: BW200
HCU[2] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[3] : Card Series: BW200
HCU[3] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[4] : Card Series: BW200
HCU[4] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[5] : Card Series: BW200
HCU[5] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[6] : Card Series: BW200
HCU[6] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[7] : Card Series: BW200
HCU[7] : Card Vendor: C-3000 IC Design Co., Ltd.
======================================================================================
======================================================================================
HCU[0] : No Bad Page! (Type: all)
HCU[1] : No Bad Page! (Type: all)
HCU[2] : No Bad Page! (Type: all)
HCU[3] : No Bad Page! (Type: all)
HCU[4] : No Bad Page! (Type: all)
HCU[5] : No Bad Page! (Type: all)
HCU[6] : No Bad Page! (Type: all)
HCU[7] : No Bad Page! (Type: all)
======================================================================================
=================================== End of SMI Log ===================================
...@@ -60,3 +60,12 @@ func GetProcessByName(cmdline string) ([]*process.Process, error) { ...@@ -60,3 +60,12 @@ func GetProcessByName(cmdline string) ([]*process.Process, error) {
} }
return result, nil return result, nil
} }
// GetProcessCPUUsage 获取进程的CPU使用率
func GetProcessCPUUsage(pid int32) (float64, error) {
p, err := process.NewProcess(pid)
if err != nil {
return 0, err
}
return p.CPUPercent()
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment