Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
liming6
dcu-process-montor
Commits
b632c3a7
Commit
b632c3a7
authored
Nov 04, 2025
by
liming6
Browse files
feature 添加提取dcu信息的代码
parent
7923c563
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1125 additions
and
42 deletions
+1125
-42
Readme.md
Readme.md
+1
-2
Todo.md
Todo.md
+5
-5
cmd/cmd-wrapper/docker-wrapper/parse-arg.go
cmd/cmd-wrapper/docker-wrapper/parse-arg.go
+0
-0
common/types/types.go
common/types/types.go
+9
-5
docker/find.go
docker/find.go
+85
-8
go.mod
go.mod
+19
-1
gpu/hy-smi.go
gpu/hy-smi.go
+413
-21
gpu/regexp_test.go
gpu/regexp_test.go
+82
-0
test-data/hy.log
test-data/hy.log
+502
-0
utils/pid.go
utils/pid.go
+9
-0
No files found.
Readme.md
View file @
b632c3a7
...
@@ -31,7 +31,7 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能
...
@@ -31,7 +31,7 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能
-
docker容器
-
docker容器
-
主机进程使用
-
主机进程使用
对于docker容器,我们认为谁创建了容器,那么该容器中的进程
如何
使用了显卡,就认为是容器的创建者使用了显卡
对于docker容器,我们认为谁创建了容器,那么该容器中的进程使用了显卡,就认为是容器的创建者使用了显卡
对于主机进程,那就是进程的用户使用了显卡
对于主机进程,那就是进程的用户使用了显卡
复杂情况:
复杂情况:
...
@@ -39,7 +39,6 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能
...
@@ -39,7 +39,6 @@ nvidia-dcgm-exporter和dcu-exporter收集的是一些数字指标信息,不能
-
sudo转换用户执行
-
sudo转换用户执行
-
su转换用户
-
su转换用户
思路或方法:
思路或方法:
-
docker
-
docker
...
...
Todo.md
View file @
b632c3a7
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
待办清单:
待办清单:
-
解析/etc/passwd文件,解析Linux系统用户和家目录
-
[x]
解析/etc/passwd文件,解析Linux系统用户和家目录
-
解析docker容器相关信息,尝试找出启动容器的用户
-
[ ]
解析docker容器相关信息,尝试找出启动容器的用户
-
对于占用tty的命令,查询tty用户
-
[ ] 使用包装命令,记录容器和系统用户的对应关系
-
对于非交互式的命令,???
-
[ ] 添加一种docker top命令查找进程的容器归属的方法
-
[ ] 调研一种golang tui库
cmd/
tool
/parse-arg.go
→
cmd/
cmd-wrapper/docker-wrapper
/parse-arg.go
View file @
b632c3a7
File moved
common/types/types.go
View file @
b632c3a7
package
types
package
types
// ProbeResult 探针的探测结果
// ProcessInfo 进程信息
type
ProbeResult
struct
{
type
ProcessInfo
struct
{
NvidiaGPUS
string
Pid
int32
DCUS
string
User
string
NodeInfo
string
GPUMemUsed
uint64
CPUUsed
float32
MemUsed
float32
RunTime
string
Cmdline
string
}
}
docker/find.go
View file @
b632c3a7
...
@@ -2,6 +2,7 @@ package docker
...
@@ -2,6 +2,7 @@ package docker
import
(
import
(
"get-container/utils"
"get-container/utils"
"strconv"
"context"
"context"
"errors"
"errors"
...
@@ -17,8 +18,9 @@ import (
...
@@ -17,8 +18,9 @@ import (
/**
/**
有两种方法获取进程属于哪个容器
有两种方法获取进程属于哪个容器
1. 通过查询pid命
令
空间
1. 通过查询pid命
名
空间
,仅在没有指定--pid参数时有效
2. 通过查询进程的cgroup
2. 通过查询进程的cgroup
3. 使用docker top <container-id>匹配
*/
*/
type
FindCIDMethod
string
type
FindCIDMethod
string
...
@@ -26,6 +28,7 @@ type FindCIDMethod string
...
@@ -26,6 +28,7 @@ type FindCIDMethod string
const
(
const
(
ByCgroup
FindCIDMethod
=
"byCGroup"
ByCgroup
FindCIDMethod
=
"byCGroup"
ByPidNS
FindCIDMethod
=
"byPidNS"
ByPidNS
FindCIDMethod
=
"byPidNS"
ByTop
FindCIDMethod
=
"byTop"
)
)
var
(
var
(
...
@@ -38,17 +41,84 @@ type ContainersInfo struct {
...
@@ -38,17 +41,84 @@ type ContainersInfo struct {
time
time
.
Time
// 记录写入Info的时间
time
time
.
Time
// 记录写入Info的时间
inspectInfo
map
[
string
]
container
.
InspectResponse
inspectInfo
map
[
string
]
container
.
InspectResponse
listInfo
map
[
string
]
container
.
Summary
listInfo
map
[
string
]
container
.
Summary
topInfo
map
[
string
]
container
.
TopResponse
}
type
ContainerPsInfo
struct
{
Pid
uint64
Ppid
uint64
Uid
string
Cmd
[]
string
}
func
ParsePsInfo
(
topInfo
map
[
string
]
container
.
TopResponse
)
(
map
[
string
]
ContainerPsInfo
,
error
)
{
if
topInfo
==
nil
{
return
nil
,
errors
.
New
(
"topInfo is nil"
)
}
result
:=
make
(
map
[
string
]
ContainerPsInfo
)
indexMap
,
t
:=
make
(
map
[
string
]
int
),
0
for
cid
,
topResp
:=
range
topInfo
{
for
index
,
key
:=
range
topResp
.
Titles
{
switch
strings
.
TrimSpace
(
strings
.
ToLower
(
key
))
{
case
"pid"
:
indexMap
[
key
]
=
index
t
++
break
case
"ppid"
:
indexMap
[
key
]
=
index
t
++
break
case
"uid"
:
indexMap
[
key
]
=
index
t
++
break
case
"cmd"
:
indexMap
[
key
]
=
index
t
++
break
default
:
break
}
if
t
>=
4
{
break
}
}
item
:=
ContainerPsInfo
{}
if
v
,
ok
:=
indexMap
[
"pid"
];
ok
{
pid
,
err
:=
strconv
.
ParseUint
(
topResp
.
Processes
[
v
][
0
],
10
,
64
)
if
err
!=
nil
{
return
nil
,
err
}
item
.
Pid
=
pid
}
if
v
,
ok
:=
indexMap
[
"ppid"
];
ok
{
ppid
,
err
:=
strconv
.
ParseUint
(
topResp
.
Processes
[
v
][
0
],
10
,
64
)
if
err
!=
nil
{
return
nil
,
err
}
item
.
Ppid
=
ppid
}
if
v
,
ok
:=
indexMap
[
"uid"
];
ok
{
item
.
Uid
=
topResp
.
Processes
[
v
][
0
]
}
if
v
,
ok
:=
indexMap
[
"cmd"
];
ok
{
item
.
Cmd
=
topResp
.
Processes
[
v
]
}
result
[
cid
]
=
item
}
return
result
,
nil
}
}
func
(
info
*
ContainersInfo
)
Update
()
error
{
func
(
info
*
ContainersInfo
)
Update
()
error
{
info
.
lock
.
Lock
()
info
.
lock
.
Lock
()
defer
info
.
lock
.
Unlock
()
defer
info
.
lock
.
Unlock
()
i
,
s
,
err
:=
getContainerInfo
()
i
,
s
,
t
,
err
:=
getContainerInfo
()
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
info
.
inspectInfo
=
i
info
.
inspectInfo
=
i
info
.
listInfo
=
s
info
.
listInfo
=
s
info
.
topInfo
=
t
info
.
time
=
time
.
Now
()
info
.
time
=
time
.
Now
()
return
nil
return
nil
}
}
...
@@ -64,7 +134,7 @@ func init() {
...
@@ -64,7 +134,7 @@ func init() {
}
}
func
initContainerInfo
()
error
{
func
initContainerInfo
()
error
{
inspect
,
lists
,
err
:=
getContainerInfo
()
inspect
,
lists
,
tops
,
err
:=
getContainerInfo
()
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
...
@@ -73,6 +143,7 @@ func initContainerInfo() error {
...
@@ -73,6 +143,7 @@ func initContainerInfo() error {
time
:
time
.
Now
(),
time
:
time
.
Now
(),
inspectInfo
:
inspect
,
inspectInfo
:
inspect
,
listInfo
:
lists
,
listInfo
:
lists
,
topInfo
:
tops
,
}
}
return
nil
return
nil
}
}
...
@@ -225,27 +296,33 @@ func findContainerIdByNSBatch(pids []uint64) (map[uint64]string, error) {
...
@@ -225,27 +296,33 @@ func findContainerIdByNSBatch(pids []uint64) (map[uint64]string, error) {
}
}
// getContainerInfo 获取所有正在运行的docker容器的详细信息
// getContainerInfo 获取所有正在运行的docker容器的详细信息
func
getContainerInfo
()
(
map
[
string
]
container
.
InspectResponse
,
map
[
string
]
container
.
Summary
,
error
)
{
func
getContainerInfo
()
(
map
[
string
]
container
.
InspectResponse
,
map
[
string
]
container
.
Summary
,
map
[
string
]
container
.
TopResponse
,
error
)
{
cli
,
err
:=
client
.
NewClientWithOpts
(
client
.
FromEnv
,
client
.
WithAPIVersionNegotiation
())
cli
,
err
:=
client
.
NewClientWithOpts
(
client
.
FromEnv
,
client
.
WithAPIVersionNegotiation
())
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
nil
,
err
return
nil
,
nil
,
nil
,
err
}
}
defer
func
()
{
defer
func
()
{
_
=
cli
.
Close
()
_
=
cli
.
Close
()
}()
}()
containerSum
,
err
:=
cli
.
ContainerList
(
context
.
Background
(),
client
.
ContainerListOptions
{
All
:
false
})
containerSum
,
err
:=
cli
.
ContainerList
(
context
.
Background
(),
client
.
ContainerListOptions
{
All
:
false
})
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
nil
,
err
return
nil
,
nil
,
nil
,
err
}
}
inspects
:=
make
(
map
[
string
]
container
.
InspectResponse
)
inspects
:=
make
(
map
[
string
]
container
.
InspectResponse
)
lists
:=
make
(
map
[
string
]
container
.
Summary
)
lists
:=
make
(
map
[
string
]
container
.
Summary
)
tops
:=
make
(
map
[
string
]
container
.
TopResponse
)
for
_
,
c
:=
range
containerSum
{
for
_
,
c
:=
range
containerSum
{
inspect
,
innerErr
:=
cli
.
ContainerInspect
(
context
.
Background
(),
c
.
ID
)
inspect
,
innerErr
:=
cli
.
ContainerInspect
(
context
.
Background
(),
c
.
ID
)
if
innerErr
!=
nil
{
if
innerErr
!=
nil
{
return
nil
,
nil
,
innerErr
return
nil
,
nil
,
nil
,
innerErr
}
}
inspects
[
c
.
ID
]
=
inspect
inspects
[
c
.
ID
]
=
inspect
lists
[
c
.
ID
]
=
c
lists
[
c
.
ID
]
=
c
topInfo
,
innerErr
:=
cli
.
ContainerTop
(
context
.
Background
(),
c
.
ID
,
nil
)
if
innerErr
!=
nil
{
return
nil
,
nil
,
nil
,
innerErr
}
tops
[
c
.
ID
]
=
topInfo
}
}
return
inspects
,
lists
,
nil
return
inspects
,
lists
,
tops
,
nil
}
}
go.mod
View file @
b632c3a7
...
@@ -3,6 +3,8 @@ module get-container
...
@@ -3,6 +3,8 @@ module get-container
go 1.24.2
go 1.24.2
require (
require (
github.com/charmbracelet/bubbletea
v1.3.10
github.com/charmbracelet/lipgloss
v1.1.0
github.com/moby/moby/api
v1.52.0-beta.2
github.com/moby/moby/api
v1.52.0-beta.2
github.com/moby/moby/client
v0.1.0-beta.2
github.com/moby/moby/client
v0.1.0-beta.2
github.com/shirou/gopsutil/v4
v4.25.9
github.com/shirou/gopsutil/v4
v4.25.9
...
@@ -11,29 +13,45 @@ require (
...
@@ -11,29 +13,45 @@ require (
require (
require (
github.com/Azure/go-ansiterm
v0.0.0-20250102033503-faa5f7b0171c // indirect
github.com/Azure/go-ansiterm
v0.0.0-20250102033503-faa5f7b0171c // indirect
github.com/Microsoft/go-winio
v0.6.2 // indirect
github.com/Microsoft/go-winio
v0.6.2 // indirect
github.com/aymanbagabas/go-osc52/v2
v2.0.1 // indirect
github.com/charmbracelet/colorprofile
v0.2.3-0.20250311203215-f60798e515dc // indirect
github.com/charmbracelet/x/ansi
v0.10.1 // indirect
github.com/charmbracelet/x/cellbuf
v0.0.13-0.20250311204145-2c3ea96c31dd // indirect
github.com/charmbracelet/x/term
v0.2.1 // indirect
github.com/containerd/errdefs
v1.0.0 // indirect
github.com/containerd/errdefs
v1.0.0 // indirect
github.com/containerd/errdefs/pkg
v0.3.0 // indirect
github.com/containerd/errdefs/pkg
v0.3.0 // indirect
github.com/distribution/reference
v0.6.0 // indirect
github.com/distribution/reference
v0.6.0 // indirect
github.com/docker/go-connections
v0.6.0 // indirect
github.com/docker/go-connections
v0.6.0 // indirect
github.com/docker/go-units
v0.5.0 // indirect
github.com/docker/go-units
v0.5.0 // indirect
github.com/ebitengine/purego
v0.9.0 // indirect
github.com/ebitengine/purego
v0.9.0 // indirect
github.com/erikgeiser/coninput
v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/felixge/httpsnoop
v1.0.4 // indirect
github.com/felixge/httpsnoop
v1.0.4 // indirect
github.com/go-logr/logr
v1.4.2 // indirect
github.com/go-logr/logr
v1.4.2 // indirect
github.com/go-logr/stdr
v1.2.2 // indirect
github.com/go-logr/stdr
v1.2.2 // indirect
github.com/go-ole/go-ole
v1.2.6 // indirect
github.com/go-ole/go-ole
v1.2.6 // indirect
github.com/lucasb-eyer/go-colorful
v1.2.0 // indirect
github.com/lufia/plan9stats
v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/lufia/plan9stats
v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/mattn/go-isatty
v0.0.20 // indirect
github.com/mattn/go-localereader
v0.0.1 // indirect
github.com/mattn/go-runewidth
v0.0.16 // indirect
github.com/moby/docker-image-spec
v1.3.1 // indirect
github.com/moby/docker-image-spec
v1.3.1 // indirect
github.com/moby/term
v0.5.2 // indirect
github.com/moby/term
v0.5.2 // indirect
github.com/muesli/ansi
v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader
v0.2.2 // indirect
github.com/muesli/termenv
v0.16.0 // indirect
github.com/opencontainers/go-digest
v1.0.0 // indirect
github.com/opencontainers/go-digest
v1.0.0 // indirect
github.com/opencontainers/image-spec
v1.1.1 // indirect
github.com/opencontainers/image-spec
v1.1.1 // indirect
github.com/power-devops/perfstat
v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/power-devops/perfstat
v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/rivo/uniseg
v0.4.7 // indirect
github.com/tklauser/go-sysconf
v0.3.15 // indirect
github.com/tklauser/go-sysconf
v0.3.15 // indirect
github.com/tklauser/numcpus
v0.10.0 // indirect
github.com/tklauser/numcpus
v0.10.0 // indirect
github.com/xo/terminfo
v0.0.0-20220910002029-abceb7e1c41e // indirect
github.com/yusufpapurcu/wmi
v1.2.4 // indirect
github.com/yusufpapurcu/wmi
v1.2.4 // indirect
go.opentelemetry.io/auto/sdk
v1.1.0 // indirect
go.opentelemetry.io/auto/sdk
v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp
v0.60.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp
v0.60.0 // indirect
go.opentelemetry.io/otel
v1.35.0 // indirect
go.opentelemetry.io/otel
v1.35.0 // indirect
go.opentelemetry.io/otel/metric
v1.35.0 // indirect
go.opentelemetry.io/otel/metric
v1.35.0 // indirect
go.opentelemetry.io/otel/trace
v1.35.0 // indirect
go.opentelemetry.io/otel/trace
v1.35.0 // indirect
golang.org/x/sys
v0.35.0 // indirect
golang.org/x/sys
v0.36.0 // indirect
golang.org/x/text
v0.3.8 // indirect
)
)
gpu/hy-smi.go
View file @
b632c3a7
...
@@ -5,17 +5,409 @@ import (
...
@@ -5,17 +5,409 @@ import (
"get-container/utils"
"get-container/utils"
"os/exec"
"os/exec"
"regexp"
"regexp"
"sort"
"strconv"
"strconv"
"strings"
"strings"
)
)
const
(
DCUBinaryFile
=
"hy-smi"
PIDHeader
=
"PID"
PASIDHeader
=
"PASID"
HCUNodeHeader
=
"HCU Node(Include CPU sort)"
HCUIndexHeader
=
"HCU Index"
GPUIDHeader
=
"GPUID"
PCIBusHeader
=
"PCI BUS"
VRamUsedHeader
=
"VRAM USED(MiB)"
VRamUsedPercentHeader
=
"VRAM USED(%)"
SDMAUsedHeader
=
"SDMA USED"
)
var
(
ReEmptyLine
=
regexp
.
MustCompile
(
`^\s*$`
)
ReUselessLine
=
regexp
.
MustCompile
(
`^=[ =a-zA-Z0-9]*=$`
)
ReInfoHeader
=
regexp
.
MustCompile
(
`^HCU\[(\d+)]\s+:\s+(.*)$`
)
ReDriId
=
regexp
.
MustCompile
(
`(?mi)^Device\s*ID\s*:\s*([x0-9]*)$`
)
ReVBIOSVersion
=
regexp
.
MustCompile
(
`(?mi)^VBIOS\s*version\s*:\s*([0-9a-zA-Z.]*)$`
)
ReTempEdge
=
regexp
.
MustCompile
(
`(?mi)^Temperature\s*\(Sensor\s*edge\)\s*\(C\)\s*:\s*([0-9.]*)$`
)
ReTempJunction
=
regexp
.
MustCompile
(
`(?mi)^Temperature\s*\(Sensor\s*junction\)\s*\(C\)\s*:\s*([0-9.]*)$`
)
ReTempMem
=
regexp
.
MustCompile
(
`(?mi)^Temperature\s*\(Sensor\s*mem\)\s*\(C\)\s*:\s*([0-9.]*)$`
)
ReTempCore
=
regexp
.
MustCompile
(
`(?mi)^Temperature\s*\(Sensor\s*core\)\s*\(C\)\s*:\s*([0-9.]*)$`
)
ReFClk
=
regexp
.
MustCompile
(
`(?mi)^fclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`
)
ReMClk
=
regexp
.
MustCompile
(
`(?mi)^mclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`
)
ReSClk
=
regexp
.
MustCompile
(
`(?mi)^sclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`
)
ReSOCClk
=
regexp
.
MustCompile
(
`(?mi)^socclk\s*clock\s*level\s*:\s*([0-9]*)\s*\(([0-9a-zA-Z]*)\)$`
)
RePCIClk
=
regexp
.
MustCompile
(
`(?mi)^pcie\s*clock\s*level\s*([0-9]*)\s*\(([0-9.a-zA-Z/]*)\s*,\s*([x0-9]*)\s([0-9a-zA-Z]*)\)$`
)
RePreLevel
=
regexp
.
MustCompile
(
`(?mi)^Performance\s*Level\s*:\s*([0-9a-zA-Z]*)$`
)
ReMaxPwr
=
regexp
.
MustCompile
(
`(?mi)^Max\s*Graphics\s*Package\s*Power\s*\((.*)\)\s*:\s*([0-9.]*)$`
)
ReAvgPwr
=
regexp
.
MustCompile
(
`(?mi)^Average\s*Graphics\s*Package\s*Power\s*\((.*)\)\s*:\s*([0-9.]*)$`
)
ReHCUUsage
=
regexp
.
MustCompile
(
`(?mi)^HCU\s*use\s*\(.*\)\s*:\s*([0-9.]*)$`
)
ReHCUMemUsage
=
regexp
.
MustCompile
(
`(?mi)^HCU\s*memory\s*use\s*\(.*\)\s*:\s*([0-9.]*)$`
)
ReHCUMemVendor
=
regexp
.
MustCompile
(
`(?mi)^HCU\s*Memory\s*Vendor\s*:\s*(.*)$`
)
RePCIeRelay
=
regexp
.
MustCompile
(
`(?mi)^PCIe\s*Replay\s*Count\s*:\s*([0-9]*)$`
)
ReSerialNum
=
regexp
.
MustCompile
(
`(?mi)^Serial\s*Number\s*:\s*([0-9a-zA-Z]*)$`
)
ReVoltage
=
regexp
.
MustCompile
(
`(?mi)^Voltage\s*\((.*)\)\s*:\s*([0-9.]*)$`
)
RePCIBus
=
regexp
.
MustCompile
(
`(?mi)^PCI\s*Bus\s*:\s*([0-9a-zA-Z.:]*)$`
)
ReMECFWVersion
=
regexp
.
MustCompile
(
`(?mi)^MEC\s*Firmware\s*Version\s*:\s*([0-9.]*)$`
)
ReMEC2FWVersion
=
regexp
.
MustCompile
(
`(?mi)^MEC2\s*Firmware\s*Version\s*:\s*([0-9.]*)$`
)
ReRLCFWVersion
=
regexp
.
MustCompile
(
`(?mi)^RLC\s*Firmware\s*Version\s*:\s*([0-9.]*)$`
)
ReSDMAFWVersion
=
regexp
.
MustCompile
(
`(?mi)^SDMA\s*Firmware\s*Version\s*:\s*([0-9.]*)$`
)
ReSDMA2FWVersion
=
regexp
.
MustCompile
(
`(?mi)^SDMA2\s*Firmware\s*Version\s*:\s*([0-9.]*)$`
)
ReSMCFWVersion
=
regexp
.
MustCompile
(
`(?mi)^SMC\s*Firmware\s*Version\s*:\s*([0-9.]*)$`
)
ReCardSerial
=
regexp
.
MustCompile
(
`(?mi)^Card\s*Series\s*:\s*(.*)$`
)
ReCardVendor
=
regexp
.
MustCompile
(
`(?mi)^Card\s*Vendor\s*:\s*(.*)$`
)
)
type
HYVersionInfo
struct
{
type
HYVersionInfo
struct
{
SMIVersion
string
SMIVersion
string
// --version
LibVersion
string
DriverVersion
string
// --showdriverversion
DriverVersion
string
}
func
GetHYVersionInfo
()
(
*
HYVersionInfo
,
error
)
{
versionBytes
,
err
:=
exec
.
Command
(
DCUBinaryFile
,
"--version"
)
.
Output
()
if
err
!=
nil
{
return
nil
,
err
}
driBytes
,
err
:=
exec
.
Command
(
DCUBinaryFile
,
"--showdriverversion"
)
.
Output
()
if
err
!=
nil
{
return
nil
,
err
}
return
&
HYVersionInfo
{
SMIVersion
:
strings
.
Trim
(
strings
.
TrimSpace
(
string
(
versionBytes
)),
"
\n
"
),
DriverVersion
:
strings
.
Trim
(
strings
.
TrimSpace
(
string
(
driBytes
)),
"
\n
"
),
},
nil
}
}
type
DCUInfo
struct
{
type
DCUInfo
struct
{
Id
int
// id
Name
string
// DCU名称
PerformanceLevel
string
// 性能等级
FanSpeed
float32
// 风扇转速
Temperature
float32
// 平均温图
PwrUsage
int16
PwrCapacity
int16
BusId
string
MemTotal
int32
MemUsed
int32
}
type
ClockInfo
struct
{
Level
int
// 时钟等级
Freq
string
// 频率
}
type
PcieClockInfo
struct
{
Level
int
// 时钟等级
Freq
string
// 频率
BandWidth
string
// 带宽
Times
string
// 倍率
}
// SMIAllOutput hy-smi -a输出的信息,列出了DCU全面的信息
type
SMIAllOutput
struct
{
Id
int
DeviceId
string
VBIOSVersion
string
TempEdge
float32
TempJunction
float32
TempMem
float32
TempCores
float32
FClock
*
ClockInfo
MClock
*
ClockInfo
SClock
*
ClockInfo
SOCClock
*
ClockInfo
PCIEClock
*
PcieClockInfo
PerLevel
string
MaxPwr
float32
AvgPwr
float32
HCUUsage
float32
HCUMemUsage
float32
HCUMemVendor
string
PCIERelayCount
int
SerialNumber
string
Voltage
float32
PCIBus
string
MECFWVersion
string
MEC2FWVersion
string
RLCFWVersion
string
SDMAVersion
string
SDMA2Version
string
SMCVersion
string
CardSeries
string
CardVendor
string
}
func
GetSMIAllOutput
()
([]
*
SMIAllOutput
,
error
)
{
b
,
err
:=
exec
.
Command
(
DCUBinaryFile
,
"-a"
)
.
Output
()
if
err
!=
nil
{
return
nil
,
err
}
lines
:=
strings
.
Split
(
strings
.
Trim
(
strings
.
TrimSpace
(
string
(
b
)),
"
\n
"
),
"
\n
"
)
info
:=
make
(
map
[
int
][]
string
)
for
_
,
line
:=
range
lines
{
if
ReUselessLine
.
MatchString
(
line
)
||
ReEmptyLine
.
MatchString
(
line
)
{
continue
}
if
ReInfoHeader
.
MatchString
(
line
)
{
fields
:=
ReInfoHeader
.
FindStringSubmatch
(
strings
.
TrimSpace
(
strings
.
ReplaceAll
(
line
,
"
\t
"
,
" "
)))
if
len
(
fields
)
<
2
{
continue
}
id
,
innerErr
:=
strconv
.
Atoi
(
fields
[
1
])
if
innerErr
!=
nil
{
return
nil
,
innerErr
}
if
v
,
ok
:=
info
[
id
];
!
ok
{
info
[
id
]
=
make
([]
string
,
0
)
info
[
id
]
=
append
(
v
,
fields
[
2
])
}
else
{
info
[
id
]
=
append
(
v
,
fields
[
2
])
}
}
}
result
:=
make
([]
*
SMIAllOutput
,
0
)
for
k
,
v
:=
range
info
{
item
,
innerErr
:=
parseSMIAllOutput
(
k
,
strings
.
Join
(
v
,
"
\n
"
))
if
innerErr
!=
nil
{
return
nil
,
innerErr
}
if
item
!=
nil
{
result
=
append
(
result
,
item
)
}
}
// 按照id进行排序
sort
.
Slice
(
result
,
func
(
i
,
j
int
)
bool
{
return
result
[
i
]
.
Id
<
result
[
j
]
.
Id
})
return
result
,
nil
}
func
parseSMIAllOutput
(
id
int
,
str
string
)
(
*
SMIAllOutput
,
error
)
{
if
len
(
strings
.
TrimSpace
(
str
))
==
0
{
return
nil
,
nil
}
result
:=
SMIAllOutput
{}
result
.
Id
=
id
if
s
:=
regMatch
(
ReDriId
,
str
,
1
);
s
!=
nil
{
result
.
DeviceId
=
s
[
0
]
}
if
s
:=
regMatch
(
ReVBIOSVersion
,
str
,
1
);
s
!=
nil
{
result
.
VBIOSVersion
=
s
[
0
]
}
if
s
:=
regMatch
(
ReTempEdge
,
str
,
1
);
s
!=
nil
{
if
t
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
);
err
==
nil
{
result
.
TempEdge
=
float32
(
t
)
}
}
if
s
:=
regMatch
(
ReTempJunction
,
str
,
1
);
s
!=
nil
{
if
t
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
);
err
==
nil
{
result
.
TempJunction
=
float32
(
t
)
}
}
if
s
:=
regMatch
(
ReTempMem
,
str
,
1
);
s
!=
nil
{
if
t
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
);
err
==
nil
{
result
.
TempMem
=
float32
(
t
)
}
}
if
s
:=
regMatch
(
ReTempCore
,
str
,
1
);
s
!=
nil
{
if
t
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
);
err
==
nil
{
result
.
TempCores
=
float32
(
t
)
}
}
if
s
:=
regMatch
(
ReFClk
,
str
,
1
,
2
);
s
!=
nil
{
c
:=
ClockInfo
{}
level
,
err
:=
strconv
.
Atoi
(
strings
.
TrimSpace
(
s
[
0
]))
if
err
==
nil
{
c
.
Level
=
level
}
c
.
Freq
=
strings
.
TrimSpace
(
s
[
1
])
result
.
FClock
=
&
c
}
if
s
:=
regMatch
(
ReMClk
,
str
,
1
,
2
);
s
!=
nil
{
c
:=
ClockInfo
{}
level
,
err
:=
strconv
.
Atoi
(
strings
.
TrimSpace
(
s
[
0
]))
if
err
==
nil
{
c
.
Level
=
level
}
c
.
Freq
=
strings
.
TrimSpace
(
s
[
1
])
result
.
MClock
=
&
c
}
if
s
:=
regMatch
(
ReSClk
,
str
,
1
,
2
);
s
!=
nil
{
c
:=
ClockInfo
{}
level
,
err
:=
strconv
.
Atoi
(
strings
.
TrimSpace
(
s
[
0
]))
if
err
==
nil
{
c
.
Level
=
level
}
c
.
Freq
=
strings
.
TrimSpace
(
s
[
1
])
result
.
SClock
=
&
c
}
if
s
:=
regMatch
(
ReSOCClk
,
str
,
1
,
2
);
s
!=
nil
{
c
:=
ClockInfo
{}
level
,
err
:=
strconv
.
Atoi
(
strings
.
TrimSpace
(
s
[
0
]))
if
err
==
nil
{
c
.
Level
=
level
}
c
.
Freq
=
strings
.
TrimSpace
(
s
[
1
])
result
.
SOCClock
=
&
c
}
if
s
:=
regMatch
(
RePCIClk
,
str
,
1
,
2
);
s
!=
nil
{
c
:=
ClockInfo
{}
level
,
err
:=
strconv
.
Atoi
(
strings
.
TrimSpace
(
s
[
0
]))
if
err
==
nil
{
c
.
Level
=
level
}
c
.
Freq
=
strings
.
TrimSpace
(
s
[
1
])
result
.
SOCClock
=
&
c
}
if
s
:=
regMatch
(
RePCIClk
,
str
,
1
,
2
,
3
,
4
);
s
!=
nil
{
c
:=
PcieClockInfo
{}
level
,
err
:=
strconv
.
Atoi
(
strings
.
TrimSpace
(
s
[
0
]))
if
err
==
nil
{
c
.
Level
=
level
}
c
.
BandWidth
=
strings
.
TrimSpace
(
s
[
1
])
c
.
Times
=
strings
.
TrimSpace
(
s
[
2
])
c
.
Freq
=
strings
.
TrimSpace
(
s
[
3
])
result
.
PCIEClock
=
&
c
}
if
s
:=
regMatch
(
RePreLevel
,
str
,
1
);
s
!=
nil
{
result
.
PerLevel
=
s
[
0
]
}
if
s
:=
regMatch
(
ReMaxPwr
,
str
,
2
);
s
!=
nil
{
p
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
)
if
err
==
nil
{
result
.
MaxPwr
=
float32
(
p
)
}
}
if
s
:=
regMatch
(
ReAvgPwr
,
str
,
2
);
s
!=
nil
{
p
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
)
if
err
==
nil
{
result
.
AvgPwr
=
float32
(
p
)
}
}
if
s
:=
regMatch
(
ReHCUUsage
,
str
,
1
);
s
!=
nil
{
p
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
)
if
err
==
nil
{
result
.
HCUUsage
=
float32
(
p
)
}
}
if
s
:=
regMatch
(
ReHCUMemUsage
,
str
,
1
);
s
!=
nil
{
p
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
)
if
err
==
nil
{
result
.
HCUMemUsage
=
float32
(
p
)
}
}
if
s
:=
regMatch
(
ReHCUMemVendor
,
str
,
1
);
s
!=
nil
{
result
.
HCUMemVendor
=
s
[
0
]
}
if
s
:=
regMatch
(
RePCIeRelay
,
str
,
1
);
s
!=
nil
{
i
,
err
:=
strconv
.
Atoi
(
strings
.
TrimSpace
(
s
[
0
]))
if
err
==
nil
{
result
.
PCIERelayCount
=
i
}
}
if
s
:=
regMatch
(
ReSerialNum
,
str
,
1
);
s
!=
nil
{
result
.
SerialNumber
=
s
[
0
]
}
if
s
:=
regMatch
(
ReVoltage
,
str
,
2
);
s
!=
nil
{
p
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSpace
(
s
[
0
]),
32
)
if
err
==
nil
{
result
.
Voltage
=
float32
(
p
)
}
}
if
s
:=
regMatch
(
RePCIBus
,
str
,
2
);
s
!=
nil
{
result
.
PCIBus
=
s
[
0
]
}
if
s
:=
regMatch
(
ReMECFWVersion
,
str
,
1
);
s
!=
nil
{
result
.
MECFWVersion
=
s
[
0
]
}
if
s
:=
regMatch
(
ReMEC2FWVersion
,
str
,
1
);
s
!=
nil
{
result
.
MEC2FWVersion
=
s
[
0
]
}
if
s
:=
regMatch
(
ReRLCFWVersion
,
str
,
1
);
s
!=
nil
{
result
.
RLCFWVersion
=
s
[
0
]
}
if
s
:=
regMatch
(
ReSDMAFWVersion
,
str
,
1
);
s
!=
nil
{
result
.
SDMAVersion
=
s
[
0
]
}
if
s
:=
regMatch
(
ReSDMA2FWVersion
,
str
,
1
);
s
!=
nil
{
result
.
SDMA2Version
=
s
[
0
]
}
if
s
:=
regMatch
(
ReSMCFWVersion
,
str
,
1
);
s
!=
nil
{
result
.
SMCVersion
=
s
[
0
]
}
if
s
:=
regMatch
(
ReCardSerial
,
str
,
1
);
s
!=
nil
{
result
.
CardSeries
=
s
[
0
]
}
if
s
:=
regMatch
(
ReCardVendor
,
str
,
1
);
s
!=
nil
{
result
.
CardVendor
=
s
[
0
]
}
return
&
result
,
nil
}
// DCURunningInfo DCU运行状态信息
type
DCURunningInfo
struct
{
Id
int
Temp
float32
AvgPower
float32
PerformanceLevel
string
MemPerc
float32
HCUPerc
float32
}
// GetRunningInfo 获取DCU运行相关信息
func
GetRunningInfo
()
([]
DCURunningInfo
,
error
)
{
output
,
err
:=
exec
.
Command
(
DCUBinaryFile
)
.
Output
()
if
err
!=
nil
{
return
nil
,
err
}
return
parseRunningInfo
(
string
(
output
))
}
func
parseRunningInfo
(
info
string
)
([]
DCURunningInfo
,
error
)
{
lines
:=
strings
.
Split
(
strings
.
Trim
(
strings
.
TrimSpace
(
info
),
"
\n
"
),
"
\n
"
)
result
:=
make
([]
DCURunningInfo
,
0
)
for
_
,
line
:=
range
lines
{
if
ReUselessLine
.
MatchString
(
line
)
||
ReEmptyLine
.
MatchString
(
line
)
{
continue
}
fields
:=
strings
.
Fields
(
strings
.
TrimSpace
(
line
))
if
len
(
fields
)
<
8
{
continue
}
item
:=
DCURunningInfo
{}
id
,
err
:=
strconv
.
Atoi
(
fields
[
0
])
if
err
!=
nil
{
continue
}
item
.
Id
=
id
temp
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSuffix
(
strings
.
ToLower
(
fields
[
1
]),
"c"
),
32
)
if
err
!=
nil
{
return
nil
,
err
}
item
.
Temp
=
float32
(
temp
)
avgPwr
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSuffix
(
strings
.
ToLower
(
fields
[
2
]),
"w"
),
32
)
if
err
!=
nil
{
return
nil
,
err
}
item
.
AvgPower
=
float32
(
avgPwr
)
item
.
PerformanceLevel
=
fields
[
3
]
vram
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSuffix
(
fields
[
5
],
"%"
),
32
)
if
err
!=
nil
{
return
nil
,
err
}
item
.
MemPerc
=
float32
(
vram
)
utl
,
err
:=
strconv
.
ParseFloat
(
strings
.
TrimSuffix
(
fields
[
6
],
"%"
),
32
)
if
err
!=
nil
{
return
nil
,
err
}
item
.
HCUPerc
=
float32
(
utl
)
result
=
append
(
result
,
item
)
}
return
result
,
nil
}
}
type
DCUPidInfo
struct
{
type
DCUPidInfo
struct
{
...
@@ -30,26 +422,9 @@ type DCUPidInfo struct {
...
@@ -30,26 +422,9 @@ type DCUPidInfo struct {
SDMAUsed
int
SDMAUsed
int
}
}
var
(
ReEmptyLine
=
regexp
.
MustCompile
(
`^\s*$`
)
ReUselessLine
=
regexp
.
MustCompile
(
`^=[ =a-zA-Z0-9]*=$`
)
)
const
(
PIDHeader
=
"PID"
PASIDHeader
=
"PASID"
HCUNodeHeader
=
"HCU Node(Include CPU sort)"
HCUIndexHeader
=
"HCU Index"
GPUIDHeader
=
"GPUID"
PCIBusHeader
=
"PCI BUS"
VRamUsedHeader
=
"VRAM USED(MiB)"
VRamUsedPercentHeader
=
"VRAM USED(%)"
SDMAUsedHeader
=
"SDMA USED"
)
// GetDCUPidInfo 获取Pid相关信息
// GetDCUPidInfo 获取Pid相关信息
func
GetDCUPidInfo
()
([]
DCUPidInfo
,
error
)
{
func
GetDCUPidInfo
()
([]
DCUPidInfo
,
error
)
{
output
,
err
:=
exec
.
Command
(
"hy-smi"
,
"--showpids"
)
.
Output
()
output
,
err
:=
exec
.
Command
(
DCUBinaryFile
,
"--showpids"
)
.
Output
()
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
...
@@ -146,3 +521,20 @@ func parseDCUPidInfo(s string) ([]DCUPidInfo, error) {
...
@@ -146,3 +521,20 @@ func parseDCUPidInfo(s string) ([]DCUPidInfo, error) {
}
}
return
result
,
nil
return
result
,
nil
}
}
func
regMatch
(
reg
*
regexp
.
Regexp
,
s
string
,
index
...
int
)
[]
string
{
i
:=
reg
.
FindStringSubmatch
(
s
)
if
i
==
nil
{
return
nil
}
result
:=
make
([]
string
,
0
)
l
:=
len
(
i
)
for
_
,
ind
:=
range
index
{
if
ind
>=
0
&&
ind
<
l
{
result
=
append
(
result
,
i
[
ind
])
}
else
{
return
nil
}
}
return
result
}
gpu/regexp_test.go
View file @
b632c3a7
...
@@ -2,7 +2,12 @@ package gpu
...
@@ -2,7 +2,12 @@ package gpu
import
(
import
(
"encoding/json"
"encoding/json"
"os"
"sort"
"strconv"
"strings"
"testing"
"testing"
"time"
)
)
const
(
const
(
...
@@ -206,3 +211,80 @@ func TestParseDCUPidInfo(t *testing.T) {
...
@@ -206,3 +211,80 @@ func TestParseDCUPidInfo(t *testing.T) {
t
.
Logf
(
"%+v
\n
"
,
info
)
t
.
Logf
(
"%+v
\n
"
,
info
)
}
}
}
}
const
DCUPidInfoStr
=
`
============================ System Management Interface =============================
======================================================================================
HCU Temp AvgPwr Perf PwrCap VRAM% HCU% Mode
0 34.0C 140.0W manual 800.0W 0% 0.0% Normal
1 35.0C 140.0W manual 800.0W 0% 0.0% Normal
2 35.0C 140.0W manual 800.0W 0% 0.0% Normal
3 33.0C 140.0W manual 800.0W 0% 0.0% Normal
4 36.0C 140.0W manual 800.0W 0% 0.0% Normal
5 36.0C 140.0W manual 800.0W 0% 0.0% Normal
6 34.0C 140.0W manual 800.0W 0% 0.0% Normal
7 34.0C 140.0W manual 800.0W 0% 0.0% Normal
======================================================================================
=================================== End of SMI Log ===================================
`
func
TestParseRunningInfo
(
t
*
testing
.
T
)
{
i
,
e
:=
parseRunningInfo
(
DCUPidInfoStr
)
if
e
!=
nil
{
t
.
Fatal
(
e
)
}
for
_
,
info
:=
range
i
{
t
.
Logf
(
"%+v
\n
"
,
info
)
}
}
func
TestAbc
(
t
*
testing
.
T
)
{
b
,
e
:=
os
.
ReadFile
(
"../test-data/hy.log"
)
if
e
!=
nil
{
t
.
Error
(
e
)
}
timeStart
:=
time
.
Now
()
lines
:=
strings
.
Split
(
strings
.
Trim
(
strings
.
TrimSpace
(
string
(
b
)),
"
\n
"
),
"
\n
"
)
info
:=
make
(
map
[
int
][]
string
)
for
_
,
line
:=
range
lines
{
if
ReUselessLine
.
MatchString
(
line
)
||
ReEmptyLine
.
MatchString
(
line
)
{
continue
}
if
ReInfoHeader
.
MatchString
(
line
)
{
fields
:=
ReInfoHeader
.
FindStringSubmatch
(
strings
.
TrimSpace
(
strings
.
ReplaceAll
(
line
,
"
\t
"
,
" "
)))
if
fields
==
nil
||
len
(
fields
)
<=
2
{
continue
}
id
,
innerErr
:=
strconv
.
Atoi
(
fields
[
1
])
if
innerErr
!=
nil
{
t
.
Error
(
innerErr
)
}
if
v
,
ok
:=
info
[
id
];
!
ok
{
info
[
id
]
=
make
([]
string
,
0
)
info
[
id
]
=
append
(
v
,
fields
[
2
])
}
else
{
info
[
id
]
=
append
(
v
,
fields
[
2
])
}
}
}
result
:=
make
([]
*
SMIAllOutput
,
0
)
for
k
,
v
:=
range
info
{
item
,
innerErr
:=
parseSMIAllOutput
(
k
,
strings
.
Join
(
v
,
"
\n
"
))
if
innerErr
!=
nil
{
t
.
Error
(
innerErr
)
}
if
item
!=
nil
{
result
=
append
(
result
,
item
)
}
}
sort
.
Slice
(
result
,
func
(
i
,
j
int
)
bool
{
return
result
[
i
]
.
Id
<
result
[
j
]
.
Id
})
end
:=
time
.
Now
()
t
.
Log
(
end
.
Sub
(
timeStart
)
.
Seconds
())
for
_
,
i
:=
range
result
{
t
.
Logf
(
"%+v"
,
*
i
)
}
}
test-data/hy.log
0 → 100644
View file @
b632c3a7
============================ System Management Interface =============================
======================================================================================
Driver Version: 6.3.2-V1.7.4
======================================================================================
======================================================================================
HCU[0] : Device ID: 0x6320
HCU[1] : Device ID: 0x6320
HCU[2] : Device ID: 0x6320
HCU[3] : Device ID: 0x6320
HCU[4] : Device ID: 0x6320
HCU[5] : Device ID: 0x6320
HCU[6] : Device ID: 0x6320
HCU[7] : Device ID: 0x6320
======================================================================================
======================================================================================
HCU[0] : VBIOS version: 5.717.002200A.685184
HCU[1] : VBIOS version: 5.717.002200A.685184
HCU[2] : VBIOS version: 5.717.002200A.685184
HCU[3] : VBIOS version: 5.717.002200A.685184
HCU[4] : VBIOS version: 5.717.002200A.685184
HCU[5] : VBIOS version: 5.717.002200A.685184
HCU[6] : VBIOS version: 5.717.002200A.685184
HCU[7] : VBIOS version: 5.717.002200A.685184
======================================================================================
======================================================================================
HCU[0] : Temperature (Sensor edge) (C): 36.0
HCU[0] : Temperature (Sensor junction) (C): 40.0
HCU[0] : Temperature (Sensor mem) (C): 31.0
HCU[0] : Temperature (Sensor core) (C): 34.0
HCU[1] : Temperature (Sensor edge) (C): 37.0
HCU[1] : Temperature (Sensor junction) (C): 40.0
HCU[1] : Temperature (Sensor mem) (C): 30.0
HCU[1] : Temperature (Sensor core) (C): 35.0
HCU[2] : Temperature (Sensor edge) (C): 37.0
HCU[2] : Temperature (Sensor junction) (C): 40.0
HCU[2] : Temperature (Sensor mem) (C): 29.0
HCU[2] : Temperature (Sensor core) (C): 35.0
HCU[3] : Temperature (Sensor edge) (C): 36.0
HCU[3] : Temperature (Sensor junction) (C): 39.0
HCU[3] : Temperature (Sensor mem) (C): 28.0
HCU[3] : Temperature (Sensor core) (C): 33.0
HCU[4] : Temperature (Sensor edge) (C): 38.0
HCU[4] : Temperature (Sensor junction) (C): 41.0
HCU[4] : Temperature (Sensor mem) (C): 32.0
HCU[4] : Temperature (Sensor core) (C): 36.0
HCU[5] : Temperature (Sensor edge) (C): 39.0
HCU[5] : Temperature (Sensor junction) (C): 42.0
HCU[5] : Temperature (Sensor mem) (C): 31.0
HCU[5] : Temperature (Sensor core) (C): 36.0
HCU[6] : Temperature (Sensor edge) (C): 37.0
HCU[6] : Temperature (Sensor junction) (C): 40.0
HCU[6] : Temperature (Sensor mem) (C): 29.0
HCU[6] : Temperature (Sensor core) (C): 34.0
HCU[7] : Temperature (Sensor edge) (C): 35.0
HCU[7] : Temperature (Sensor junction) (C): 40.0
HCU[7] : Temperature (Sensor mem) (C): 29.0
HCU[7] : Temperature (Sensor core) (C): 34.0
======================================================================================
======================================================================================
HCU[0] : fclk clock level: 0 (1500Mhz)
HCU[0] : mclk clock level: 0 (1500Mhz)
HCU[0] : sclk clock level: 9 (1500Mhz)
HCU[0] : socclk clock level: 0 (1100Mhz)
HCU[0] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[1] : fclk clock level: 0 (1500Mhz)
HCU[1] : mclk clock level: 0 (1500Mhz)
HCU[1] : sclk clock level: 9 (1500Mhz)
HCU[1] : socclk clock level: 0 (1100Mhz)
HCU[1] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[2] : fclk clock level: 0 (1500Mhz)
HCU[2] : mclk clock level: 0 (1500Mhz)
HCU[2] : sclk clock level: 9 (1500Mhz)
HCU[2] : socclk clock level: 0 (1100Mhz)
HCU[2] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[3] : fclk clock level: 0 (1500Mhz)
HCU[3] : mclk clock level: 0 (1500Mhz)
HCU[3] : sclk clock level: 9 (1500Mhz)
HCU[3] : socclk clock level: 0 (1100Mhz)
HCU[3] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[4] : fclk clock level: 0 (1500Mhz)
HCU[4] : mclk clock level: 0 (1500Mhz)
HCU[4] : sclk clock level: 9 (1500Mhz)
HCU[4] : socclk clock level: 0 (1100Mhz)
HCU[4] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[5] : fclk clock level: 0 (1500Mhz)
HCU[5] : mclk clock level: 0 (1500Mhz)
HCU[5] : sclk clock level: 9 (1500Mhz)
HCU[5] : socclk clock level: 0 (1100Mhz)
HCU[5] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[6] : fclk clock level: 0 (1500Mhz)
HCU[6] : mclk clock level: 0 (1500Mhz)
HCU[6] : sclk clock level: 9 (1500Mhz)
HCU[6] : socclk clock level: 0 (1100Mhz)
HCU[6] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
HCU[7] : fclk clock level: 0 (1500Mhz)
HCU[7] : mclk clock level: 0 (1500Mhz)
HCU[7] : sclk clock level: 9 (1500Mhz)
HCU[7] : socclk clock level: 0 (1100Mhz)
HCU[7] : pcie clock level 2 (32.0GT/s, x16 1100Mhz)
======================================================================================
======================================================================================
HCU[0] : Performance Level: manual
HCU[1] : Performance Level: manual
HCU[2] : Performance Level: manual
HCU[3] : Performance Level: manual
HCU[4] : Performance Level: manual
HCU[5] : Performance Level: manual
HCU[6] : Performance Level: manual
HCU[7] : Performance Level: manual
======================================================================================
======================================================================================
HCU[0] : Max Graphics Package Power (W): 800.0
HCU[1] : Max Graphics Package Power (W): 800.0
HCU[2] : Max Graphics Package Power (W): 800.0
HCU[3] : Max Graphics Package Power (W): 800.0
HCU[4] : Max Graphics Package Power (W): 800.0
HCU[5] : Max Graphics Package Power (W): 800.0
HCU[6] : Max Graphics Package Power (W): 800.0
HCU[7] : Max Graphics Package Power (W): 800.0
======================================================================================
======================================================================================
HCU[0] : Average Graphics Package Power (W): 140.0
HCU[1] : Average Graphics Package Power (W): 140.0
HCU[2] : Average Graphics Package Power (W): 140.0
HCU[3] : Average Graphics Package Power (W): 140.0
HCU[4] : Average Graphics Package Power (W): 140.0
HCU[5] : Average Graphics Package Power (W): 140.0
HCU[6] : Average Graphics Package Power (W): 140.0
HCU[7] : Average Graphics Package Power (W): 140.0
======================================================================================
======================================================================================
HCU[0] : Supported fclk frequencies on HCU 0
HCU[0] : 0: 1500Mhz *
HCU[0] :
HCU[0] : Supported mclk frequencies on HCU 0
HCU[0] : 0: 1500Mhz *
HCU[0] :
HCU[0] : Supported sclk frequencies on HCU 0
HCU[0] : 0: 300Mhz
HCU[0] : 1: 600Mhz
HCU[0] : 2: 800Mhz
HCU[0] : 3: 900Mhz
HCU[0] : 4: 1000Mhz
HCU[0] : 5: 1100Mhz
HCU[0] : 6: 1200Mhz
HCU[0] : 7: 1300Mhz
HCU[0] : 8: 1400Mhz
HCU[0] : 9: 1500Mhz *
HCU[0] : *: 1600Mhz
HCU[0] :
HCU[0] : Supported socclk frequencies on HCU 0
HCU[0] : 0: 1100Mhz *
HCU[0] :
HCU[0] : Supported pcie frequencies on HCU 0
HCU[0] : 0: 2.5GT/s, x16 300Mhz
HCU[0] : 1: 8.0GT/s, x16 625Mhz
HCU[0] : 2: 32.0GT/s, x16 1100Mhz *
HCU[0] :
HCU[1] : Supported fclk frequencies on HCU 1
HCU[1] : 0: 1500Mhz *
HCU[1] :
HCU[1] : Supported mclk frequencies on HCU 1
HCU[1] : 0: 1500Mhz *
HCU[1] :
HCU[1] : Supported sclk frequencies on HCU 1
HCU[1] : 0: 300Mhz
HCU[1] : 1: 600Mhz
HCU[1] : 2: 800Mhz
HCU[1] : 3: 900Mhz
HCU[1] : 4: 1000Mhz
HCU[1] : 5: 1100Mhz
HCU[1] : 6: 1200Mhz
HCU[1] : 7: 1300Mhz
HCU[1] : 8: 1400Mhz
HCU[1] : 9: 1500Mhz *
HCU[1] : *: 1600Mhz
HCU[1] :
HCU[1] : Supported socclk frequencies on HCU 1
HCU[1] : 0: 1100Mhz *
HCU[1] :
HCU[1] : Supported pcie frequencies on HCU 1
HCU[1] : 0: 2.5GT/s, x16 300Mhz
HCU[1] : 1: 8.0GT/s, x16 625Mhz
HCU[1] : 2: 32.0GT/s, x16 1100Mhz *
HCU[1] :
HCU[2] : Supported fclk frequencies on HCU 2
HCU[2] : 0: 1500Mhz *
HCU[2] :
HCU[2] : Supported mclk frequencies on HCU 2
HCU[2] : 0: 1500Mhz *
HCU[2] :
HCU[2] : Supported sclk frequencies on HCU 2
HCU[2] : 0: 300Mhz
HCU[2] : 1: 600Mhz
HCU[2] : 2: 800Mhz
HCU[2] : 3: 900Mhz
HCU[2] : 4: 1000Mhz
HCU[2] : 5: 1100Mhz
HCU[2] : 6: 1200Mhz
HCU[2] : 7: 1300Mhz
HCU[2] : 8: 1400Mhz
HCU[2] : 9: 1500Mhz *
HCU[2] : *: 1600Mhz
HCU[2] :
HCU[2] : Supported socclk frequencies on HCU 2
HCU[2] : 0: 1100Mhz *
HCU[2] :
HCU[2] : Supported pcie frequencies on HCU 2
HCU[2] : 0: 2.5GT/s, x16 300Mhz
HCU[2] : 1: 8.0GT/s, x16 625Mhz
HCU[2] : 2: 32.0GT/s, x16 1100Mhz *
HCU[2] :
HCU[3] : Supported fclk frequencies on HCU 3
HCU[3] : 0: 1500Mhz *
HCU[3] :
HCU[3] : Supported mclk frequencies on HCU 3
HCU[3] : 0: 1500Mhz *
HCU[3] :
HCU[3] : Supported sclk frequencies on HCU 3
HCU[3] : 0: 300Mhz
HCU[3] : 1: 600Mhz
HCU[3] : 2: 800Mhz
HCU[3] : 3: 900Mhz
HCU[3] : 4: 1000Mhz
HCU[3] : 5: 1100Mhz
HCU[3] : 6: 1200Mhz
HCU[3] : 7: 1300Mhz
HCU[3] : 8: 1400Mhz
HCU[3] : 9: 1500Mhz *
HCU[3] : *: 1600Mhz
HCU[3] :
HCU[3] : Supported socclk frequencies on HCU 3
HCU[3] : 0: 1100Mhz *
HCU[3] :
HCU[3] : Supported pcie frequencies on HCU 3
HCU[3] : 0: 2.5GT/s, x16 300Mhz
HCU[3] : 1: 8.0GT/s, x16 625Mhz
HCU[3] : 2: 32.0GT/s, x16 1100Mhz *
HCU[3] :
HCU[4] : Supported fclk frequencies on HCU 4
HCU[4] : 0: 1500Mhz *
HCU[4] :
HCU[4] : Supported mclk frequencies on HCU 4
HCU[4] : 0: 1500Mhz *
HCU[4] :
HCU[4] : Supported sclk frequencies on HCU 4
HCU[4] : 0: 300Mhz
HCU[4] : 1: 600Mhz
HCU[4] : 2: 800Mhz
HCU[4] : 3: 900Mhz
HCU[4] : 4: 1000Mhz
HCU[4] : 5: 1100Mhz
HCU[4] : 6: 1200Mhz
HCU[4] : 7: 1300Mhz
HCU[4] : 8: 1400Mhz
HCU[4] : 9: 1500Mhz *
HCU[4] : *: 1600Mhz
HCU[4] :
HCU[4] : Supported socclk frequencies on HCU 4
HCU[4] : 0: 1100Mhz *
HCU[4] :
HCU[4] : Supported pcie frequencies on HCU 4
HCU[4] : 0: 2.5GT/s, x16 300Mhz
HCU[4] : 1: 8.0GT/s, x16 625Mhz
HCU[4] : 2: 32.0GT/s, x16 1100Mhz *
HCU[4] :
HCU[5] : Supported fclk frequencies on HCU 5
HCU[5] : 0: 1500Mhz *
HCU[5] :
HCU[5] : Supported mclk frequencies on HCU 5
HCU[5] : 0: 1500Mhz *
HCU[5] :
HCU[5] : Supported sclk frequencies on HCU 5
HCU[5] : 0: 300Mhz
HCU[5] : 1: 600Mhz
HCU[5] : 2: 800Mhz
HCU[5] : 3: 900Mhz
HCU[5] : 4: 1000Mhz
HCU[5] : 5: 1100Mhz
HCU[5] : 6: 1200Mhz
HCU[5] : 7: 1300Mhz
HCU[5] : 8: 1400Mhz
HCU[5] : 9: 1500Mhz *
HCU[5] : *: 1600Mhz
HCU[5] :
HCU[5] : Supported socclk frequencies on HCU 5
HCU[5] : 0: 1100Mhz *
HCU[5] :
HCU[5] : Supported pcie frequencies on HCU 5
HCU[5] : 0: 2.5GT/s, x16 300Mhz
HCU[5] : 1: 8.0GT/s, x16 625Mhz
HCU[5] : 2: 32.0GT/s, x16 1100Mhz *
HCU[5] :
HCU[6] : Supported fclk frequencies on HCU 6
HCU[6] : 0: 1500Mhz *
HCU[6] :
HCU[6] : Supported mclk frequencies on HCU 6
HCU[6] : 0: 1500Mhz *
HCU[6] :
HCU[6] : Supported sclk frequencies on HCU 6
HCU[6] : 0: 300Mhz
HCU[6] : 1: 600Mhz
HCU[6] : 2: 800Mhz
HCU[6] : 3: 900Mhz
HCU[6] : 4: 1000Mhz
HCU[6] : 5: 1100Mhz
HCU[6] : 6: 1200Mhz
HCU[6] : 7: 1300Mhz
HCU[6] : 8: 1400Mhz
HCU[6] : 9: 1500Mhz *
HCU[6] : *: 1600Mhz
HCU[6] :
HCU[6] : Supported socclk frequencies on HCU 6
HCU[6] : 0: 1100Mhz *
HCU[6] :
HCU[6] : Supported pcie frequencies on HCU 6
HCU[6] : 0: 2.5GT/s, x16 300Mhz
HCU[6] : 1: 8.0GT/s, x16 625Mhz
HCU[6] : 2: 32.0GT/s, x16 1100Mhz *
HCU[6] :
HCU[7] : Supported fclk frequencies on HCU 7
HCU[7] : 0: 1500Mhz *
HCU[7] :
HCU[7] : Supported mclk frequencies on HCU 7
HCU[7] : 0: 1500Mhz *
HCU[7] :
HCU[7] : Supported sclk frequencies on HCU 7
HCU[7] : 0: 300Mhz
HCU[7] : 1: 600Mhz
HCU[7] : 2: 800Mhz
HCU[7] : 3: 900Mhz
HCU[7] : 4: 1000Mhz
HCU[7] : 5: 1100Mhz
HCU[7] : 6: 1200Mhz
HCU[7] : 7: 1300Mhz
HCU[7] : 8: 1400Mhz
HCU[7] : 9: 1500Mhz *
HCU[7] : *: 1600Mhz
HCU[7] :
HCU[7] : Supported socclk frequencies on HCU 7
HCU[7] : 0: 1100Mhz *
HCU[7] :
HCU[7] : Supported pcie frequencies on HCU 7
HCU[7] : 0: 2.5GT/s, x16 300Mhz
HCU[7] : 1: 8.0GT/s, x16 625Mhz
HCU[7] : 2: 32.0GT/s, x16 1100Mhz *
HCU[7] :
======================================================================================
======================================================================================
HCU[0] : HCU use (%): 0.0
HCU[1] : HCU use (%): 0.0
HCU[2] : HCU use (%): 0.0
HCU[3] : HCU use (%): 0.0
HCU[4] : HCU use (%): 0.0
HCU[5] : HCU use (%): 0.0
HCU[6] : HCU use (%): 0.0
HCU[7] : HCU use (%): 0.0
======================================================================================
======================================================================================
HCU[0] : HCU memory use (%): 0
HCU[1] : HCU memory use (%): 0
HCU[2] : HCU memory use (%): 0
HCU[3] : HCU memory use (%): 0
HCU[4] : HCU memory use (%): 0
HCU[5] : HCU memory use (%): 0
HCU[6] : HCU memory use (%): 0
HCU[7] : HCU memory use (%): 0
======================================================================================
======================================================================================
HCU[0] : HCU Memory Vendor: samsung
HCU[1] : HCU Memory Vendor: samsung
HCU[2] : HCU Memory Vendor: samsung
HCU[3] : HCU Memory Vendor: samsung
HCU[4] : HCU Memory Vendor: samsung
HCU[5] : HCU Memory Vendor: samsung
HCU[6] : HCU Memory Vendor: samsung
HCU[7] : HCU Memory Vendor: samsung
======================================================================================
======================================================================================
HCU[0] : PCIe Replay Count: 0
HCU[1] : PCIe Replay Count: 0
HCU[2] : PCIe Replay Count: 0
HCU[3] : PCIe Replay Count: 0
HCU[4] : PCIe Replay Count: 0
HCU[5] : PCIe Replay Count: 0
HCU[6] : PCIe Replay Count: 0
HCU[7] : PCIe Replay Count: 0
======================================================================================
======================================================================================
HCU[0] : Serial Number: T6V51408031001
HCU[1] : Serial Number: T6V51213060601
HCU[2] : Serial Number: T6V51609020301
HCU[3] : Serial Number: T6V51607010801
HCU[4] : Serial Number: T6V51409010501
HCU[5] : Serial Number: T6V51420070301
HCU[6] : Serial Number: T6V51401040101
HCU[7] : Serial Number: T6V51205080901
======================================================================================
======================================================================================
No KFD PIDs currently running!
======================================================================================
======================================================================================
HCU[0] : Voltage (mV): 956
HCU[1] : Voltage (mV): 956
HCU[2] : Voltage (mV): 956
HCU[3] : Voltage (mV): 956
HCU[4] : Voltage (mV): 956
HCU[5] : Voltage (mV): 956
HCU[6] : Voltage (mV): 956
HCU[7] : Voltage (mV): 956
======================================================================================
======================================================================================
HCU[0] : PCI Bus: 0000:9f:00.0
HCU[1] : PCI Bus: 0000:56:00.0
HCU[2] : PCI Bus: 0000:5d:00.0
HCU[3] : PCI Bus: 0000:05:00.0
HCU[4] : PCI Bus: 0000:e5:00.0
HCU[5] : PCI Bus: 0000:c1:00.0
HCU[6] : PCI Bus: 0000:ca:00.0
HCU[7] : PCI Bus: 0000:b1:00.0
======================================================================================
======================================================================================
HCU[0] : MEC Firmware Version: 42
HCU[0] : MEC2 Firmware Version: 42
HCU[0] : RLC Firmware Version: 1
HCU[0] : SDMA Firmware Version: 10
HCU[0] : SDMA2 Firmware Version: 10
HCU[0] : SMC Firmware Version: 00.00.32.01
HCU[1] : MEC Firmware Version: 42
HCU[1] : MEC2 Firmware Version: 42
HCU[1] : RLC Firmware Version: 1
HCU[1] : SDMA Firmware Version: 10
HCU[1] : SDMA2 Firmware Version: 10
HCU[1] : SMC Firmware Version: 00.00.32.01
HCU[2] : MEC Firmware Version: 42
HCU[2] : MEC2 Firmware Version: 42
HCU[2] : RLC Firmware Version: 1
HCU[2] : SDMA Firmware Version: 10
HCU[2] : SDMA2 Firmware Version: 10
HCU[2] : SMC Firmware Version: 00.00.32.01
HCU[3] : MEC Firmware Version: 42
HCU[3] : MEC2 Firmware Version: 42
HCU[3] : RLC Firmware Version: 1
HCU[3] : SDMA Firmware Version: 10
HCU[3] : SDMA2 Firmware Version: 10
HCU[3] : SMC Firmware Version: 00.00.32.01
HCU[4] : MEC Firmware Version: 42
HCU[4] : MEC2 Firmware Version: 42
HCU[4] : RLC Firmware Version: 1
HCU[4] : SDMA Firmware Version: 10
HCU[4] : SDMA2 Firmware Version: 10
HCU[4] : SMC Firmware Version: 00.00.32.01
HCU[5] : MEC Firmware Version: 42
HCU[5] : MEC2 Firmware Version: 42
HCU[5] : RLC Firmware Version: 1
HCU[5] : SDMA Firmware Version: 10
HCU[5] : SDMA2 Firmware Version: 10
HCU[5] : SMC Firmware Version: 00.00.32.01
HCU[6] : MEC Firmware Version: 42
HCU[6] : MEC2 Firmware Version: 42
HCU[6] : RLC Firmware Version: 1
HCU[6] : SDMA Firmware Version: 10
HCU[6] : SDMA2 Firmware Version: 10
HCU[6] : SMC Firmware Version: 00.00.32.01
HCU[7] : MEC Firmware Version: 42
HCU[7] : MEC2 Firmware Version: 42
HCU[7] : RLC Firmware Version: 1
HCU[7] : SDMA Firmware Version: 10
HCU[7] : SDMA2 Firmware Version: 10
HCU[7] : SMC Firmware Version: 00.00.32.01
======================================================================================
======================================================================================
HCU[0] : Card Series: BW200
HCU[0] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[1] : Card Series: BW200
HCU[1] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[2] : Card Series: BW200
HCU[2] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[3] : Card Series: BW200
HCU[3] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[4] : Card Series: BW200
HCU[4] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[5] : Card Series: BW200
HCU[5] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[6] : Card Series: BW200
HCU[6] : Card Vendor: C-3000 IC Design Co., Ltd.
HCU[7] : Card Series: BW200
HCU[7] : Card Vendor: C-3000 IC Design Co., Ltd.
======================================================================================
======================================================================================
HCU[0] : No Bad Page! (Type: all)
HCU[1] : No Bad Page! (Type: all)
HCU[2] : No Bad Page! (Type: all)
HCU[3] : No Bad Page! (Type: all)
HCU[4] : No Bad Page! (Type: all)
HCU[5] : No Bad Page! (Type: all)
HCU[6] : No Bad Page! (Type: all)
HCU[7] : No Bad Page! (Type: all)
======================================================================================
=================================== End of SMI Log ===================================
utils/pid.go
View file @
b632c3a7
...
@@ -60,3 +60,12 @@ func GetProcessByName(cmdline string) ([]*process.Process, error) {
...
@@ -60,3 +60,12 @@ func GetProcessByName(cmdline string) ([]*process.Process, error) {
}
}
return
result
,
nil
return
result
,
nil
}
}
// GetProcessCPUUsage 获取进程的CPU使用率
func
GetProcessCPUUsage
(
pid
int32
)
(
float64
,
error
)
{
p
,
err
:=
process
.
NewProcess
(
pid
)
if
err
!=
nil
{
return
0
,
err
}
return
p
.
CPUPercent
()
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment