package backend import ( "fmt" "get-container/docker" "get-container/gpu" "get-container/utils" "maps" "math" "os" "strconv" "strings" "sync" "time" "github.com/shirou/gopsutil/v4/process" ) /* backend包为tui包提供数据 */ var ( MapIdDCU = sync.Map{} // 记录dcu信息 DockerPidInfo *DockerProcessMap = nil User = "" HostName = "" ) func init() { i, err := docker.ContainerInfo.GetProcessIdInDocker(false) if err != nil || i == nil { DockerPidInfo = &DockerProcessMap{lock: sync.RWMutex{}, pids: make(map[int32]bool)} return } DockerPidInfo = &DockerProcessMap{lock: sync.RWMutex{}, pids: maps.Clone(i)} HostName, _ = os.Hostname() uid := os.Getuid() u, err := utils.GetSysUserById(uid) if err == nil && u != nil { User = u.Name } else { User = strconv.Itoa(uid) } } type DCUInfo struct { Id int Name string // full PerformanceLevel string // full Fan string // full Temp float32 PwrAvg float32 PwrCap float32 // full BusId string // full MemTotal int MemUsed int MemUsedPerent float32 Mig bool // full DCUUTil float32 Ecc bool // full PwrMode string } // UpdateDCUInfo 更新dcu信息,full表示是否全部更新 func UpdateDCUInfo(full bool) { wg := sync.WaitGroup{} var smiAll map[int]*gpu.SMIAllOutput var eccInfo, migInfo map[int]bool var memInfo map[int]gpu.DCUMemInfo var runInfo map[int]gpu.DCURunningInfo var errSmiAll, errEcc, errMem, errRun error if full { wg.Add(5) go func() { smiAll, errSmiAll = gpu.GetSMIAllOutput() wg.Done() }() go func() { eccInfo, errEcc = gpu.GetEccInfo() wg.Done() }() } else { wg.Add(3) } go func() { migInfo = gpu.GetMigInfo() wg.Done() }() go func() { memInfo, errMem = gpu.GetDCUMemInfo() wg.Done() }() go func() { runInfo, errRun = gpu.GetRunningInfo() wg.Done() }() wg.Wait() cache := make(map[int]DCUInfo) if errSmiAll == nil && smiAll != nil { for k, v := range smiAll { i, have := cache[k] if !have { i = DCUInfo{} i.Id = k } i.Name = v.CardSeries i.PerformanceLevel = v.PerLevel i.Fan = gpu.NA i.PwrCap = v.MaxPwr i.PwrAvg = v.AvgPwr i.BusId = v.PCIBus i.MemUsedPerent = v.HCUMemUsage i.DCUUTil = v.HCUUsage cache[k] = i } } if errEcc == nil && eccInfo != nil { for k, v := range eccInfo { i, have := cache[k] if !have { i = DCUInfo{} i.Id = k } i.Ecc = v cache[k] = i } } for k, v := range migInfo { i, have := cache[k] if !have { i = DCUInfo{} i.Id = k } i.Mig = v cache[k] = i } if memInfo != nil && errMem == nil { for k, v := range memInfo { i, have := cache[k] if !have { i = DCUInfo{} i.Id = k } i.MemTotal = int(v.Total.Num) i.MemUsed = int(v.Used.Num) cache[k] = i } } if errRun == nil && runInfo != nil { for k, v := range runInfo { i, have := cache[k] if !have { i = DCUInfo{} i.Id = k } i.Temp = v.Temp i.PwrAvg = v.AvgPower i.PerformanceLevel = v.PerformanceLevel i.MemUsedPerent = v.MemPerc i.DCUUTil = v.HCUPerc i.PwrMode = v.Mode cache[k] = i } } for k, v := range cache { old, have := MapIdDCU.LoadOrStore(k, &v) if !have { continue } d := old.(*DCUInfo) if full { d.Name = v.Name d.PerformanceLevel = v.PerformanceLevel d.Fan = v.Fan d.PwrCap = v.PwrCap d.BusId = v.BusId d.Mig = v.Mig d.Ecc = v.Ecc } d.Temp = v.Temp d.PwrAvg = v.PwrAvg d.MemTotal = v.MemTotal d.MemUsed = v.MemUsed d.MemUsedPerent = v.MemUsedPerent d.DCUUTil = v.DCUUTil d.PwrMode = v.PwrMode } } func GetDCUInfo() map[int]DCUInfo { result := make(map[int]DCUInfo) MapIdDCU.Range(func(key, value any) bool { id := key.(int) val := value.(*DCUInfo) result[id] = *val return true }) return result } type DockerProcessMap struct { lock sync.RWMutex pids map[int32]bool } func (dpm *DockerProcessMap) GetPidInfo() map[int32]bool { rl := dpm.lock.RLocker() rl.Lock() defer rl.Unlock() return maps.Clone(dpm.pids) } // Update 重新获取数据,这是一个耗时的操作 func (dpm *DockerProcessMap) Update() map[int32]bool { i, err := docker.ContainerInfo.GetProcessIdInDocker(true) if err != nil || i == nil { dpm.pids = make(map[int32]bool) return make(map[int32]bool) } dpm.pids = maps.Clone(i) return maps.Clone(i) } type DCUProcessInfo struct { DCU int // 使用的dcu号 DCUMem string // 使用的dcu内存容量 SDMA int Info ProcessInfo // 通用进程信息 } type ProcessInfo struct { Pid int32 // 进程号 User string // 用户名或uid CPU float64 // CPU使用率 Mem float32 // 内存使用率 Time string // 占用的CPU时间 Cmd string // 命令 InDocker bool // 是否在docker容器里 } func getProcessInfo(pids []int32) map[int32]ProcessInfo { result := make(map[int32]ProcessInfo) if len(pids) == 0 { return result } dockerPids := DockerPidInfo.GetPidInfo() for _, pid := range pids { p, err := process.NewProcess(int32(pid)) if err != nil { continue } item := ProcessInfo{Pid: p.Pid} item.User, _ = p.Username() item.CPU, _ = p.CPUPercent() item.Mem, _ = p.MemoryPercent() t, err := p.Times() if err == nil { item.Time = durationStr(time.Duration((t.System + t.User)) * time.Second) } item.Cmd, _ = p.Cmdline() a, b := dockerPids[item.Pid] item.InDocker = a && b result[p.Pid] = item } return result } // GetDCUProcessInfo 返回值的key为dcu index func GetDCUProcessInfo() map[int][]DCUProcessInfo { result := make(map[int][]DCUProcessInfo) info, err := gpu.GetDCUPidInfo() if err != nil { return result } pids := make([]int32, 0) for _, v := range info { pids = append(pids, v.Pid) } pinfo := getProcessInfo(pids) for _, v := range info { index := make([]int, 0) for _, i := range v.HCUIndex { ii, err := strconv.Atoi(i) if err != nil { continue } index = append(index, ii) } for _, i := range index { l, have := result[i] if !have { result[i] = make([]DCUProcessInfo, 0) l = result[i] } item := DCUProcessInfo{DCU: i} item.Info = pinfo[v.Pid] item.DCUMem = v.VRamUsed.HumanReadStr(1) item.SDMA = v.SDMAUsed l = append(l, item) result[i] = l } } return result } // durationStr 将时间段格式化为 小时:分钟:秒s的格式 func durationStr(d time.Duration) string { h := int(math.Floor(d.Hours())) m := int(d.Minutes()) % 60 s := int(math.Floor(d.Seconds())) % 60 return strings.Replace(fmt.Sprintf("%d:%2d:%2d", h, m, s), " ", "0", -1) }