Commit 5f7b0309 authored by songlinfeng's avatar songlinfeng
Browse files

add docker query

parent 3f9e28c9
package docker
import (
"dcu-container-toolkit/internal/logger"
"dcu-container-toolkit/internal/query"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
type config struct {
dcus string
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (m command) build() *cli.Command {
cfg := config{}
docker := cli.Command{
Name: "docker",
Usage: "List which dockers are using the DCUs",
Action: func(context *cli.Context) error {
return m.run(context, &cfg)
},
}
flags := []cli.Flag{
&cli.StringFlag{
Name: "dcus",
Aliases: []string{"d"},
Value: "all",
Usage: "Show all DCUs, including those not in use",
Destination: &cfg.dcus,
},
}
docker.Flags = flags
return &docker
}
func (m command) run(c *cli.Context, cfg *config) error {
err := query.ShowStatus(cfg.dcus)
if err != nil {
return err
}
return nil
}
......@@ -8,6 +8,7 @@ import (
"dcu-container-toolkit/cmd/dcu-ctk/cdi"
"dcu-container-toolkit/cmd/dcu-ctk/config"
"dcu-container-toolkit/cmd/dcu-ctk/dcu-tracker"
"dcu-container-toolkit/cmd/dcu-ctk/docker"
"dcu-container-toolkit/cmd/dcu-ctk/hook"
"dcu-container-toolkit/cmd/dcu-ctk/runtime"
"dcu-container-toolkit/cmd/dcu-ctk/rootless"
......@@ -77,6 +78,7 @@ func main() {
dcuTracker.NewCommand(logger),
runtime.NewCommand(logger),
config.NewCommand(logger),
docker.NewCommand(logger),
hook.NewCommand(logger),
cdi.NewCommand(logger),
}
......
package query
import (
"bytes"
"dcu-container-toolkit/internal/hydcu"
"fmt"
"os/exec"
"regexp"
"sort"
"strconv"
"strings"
)
type DcuInfo struct {
DcuId int
Pid []string
ContainerName []string
Uuid string
}
type DCUProcess struct {
Pid string
Index string
}
func parseDCUsList(dcus string) ([]int, []string, []string, error) {
// isHexString checks if a string contains only hexadecimal characters
isHexString := func(s string) bool {
if len(s) == 0 {
return false
}
for _, c := range s {
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
return false
}
}
return true
}
validDCUs := []int{}
invalidDCUs := []string{}
invalidDCUsRange := []string{}
dcusInfo, err := hydcu.GetHYDCUs()
if err != nil {
return []int{}, []string{}, []string{}, fmt.Errorf("Failed to get DCU info, Error: %v", err)
}
if dcus == "all" || dcus == "All" || dcus == "ALL" {
for i := 0; i < len(dcusInfo); i++ {
validDCUs = append(validDCUs, i)
}
return validDCUs, []string{}, []string{}, nil
}
uuidToDCUIdMap, err := hydcu.GetUniqueIdToDeviceIndexMap()
if err != nil {
fmt.Printf("Failed to get UUID to DCU Id mappings: %v", err)
uuidToDCUIdMap = make(map[string][]int)
}
for _, c := range strings.Split(dcus, ",") {
if strings.HasPrefix(c, "0x") || strings.HasPrefix(c, "0X") ||
(len(c) > 8 && isHexString(c)) {
uuid := strings.ToLower(c)
if !strings.HasPrefix(uuid, "0x") {
uuid = "0x" + uuid
}
if gpuIds, exists := uuidToDCUIdMap[uuid]; exists {
validDCUs = append(validDCUs, gpuIds...)
} else {
uuid = strings.TrimPrefix(uuid, "0x")
if dcuIds, exists := uuidToDCUIdMap[uuid]; exists {
validDCUs = append(validDCUs, dcuIds...)
} else {
invalidDCUs = append(invalidDCUs, c)
}
}
} else if strings.Contains(c, "-") {
devsRange := strings.SplitN(c, "-", 2)
start, err0 := strconv.Atoi(devsRange[0])
end, err1 := strconv.Atoi(devsRange[1])
if err0 != nil || err1 != nil ||
start < 0 || end < 0 || start > end {
invalidDCUsRange = append(invalidDCUsRange, c)
} else {
for i := start; i <= end; i++ {
if i < len(dcusInfo) {
validDCUs = append(validDCUs, i)
} else {
invalidDCUs = append(invalidDCUs, strconv.Itoa(i))
}
}
}
} else {
i, err := strconv.Atoi(c)
if err == nil {
if i >= 0 && i < len(dcusInfo) {
validDCUs = append(validDCUs, i)
} else {
invalidDCUs = append(invalidDCUs, c)
}
} else {
invalidDCUs = append(invalidDCUs, c)
}
}
}
sort.Ints(validDCUs)
return validDCUs, invalidDCUs, invalidDCUsRange, nil
}
func CheckHySmi() (string, error) {
cmd := exec.Command("whereis", "hy-smi")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
return "", err
}
output := out.String()
fields := strings.Fields(output)
if len(fields) > 1 {
return fields[1], nil
}
return "", nil
}
func Exec(hysmi string) ([]DCUProcess, error) {
cmd := exec.Command(hysmi, "--showpids")
var out bytes.Buffer
cmd.Stdout = &out
var results []DCUProcess
err := cmd.Run()
if err != nil {
return results, fmt.Errorf("Failed to run hy-smi command, Error: %v", err)
}
output := out.String()
reBlock := regexp.MustCompile(`PID:\s*(\d+)[\s\S]*?[H|D]CU Index:\s*(.*)`)
matches := reBlock.FindAllStringSubmatch(output, -1)
for _, m := range matches {
pid := m[1]
index := strings.TrimSpace(m[2])
index = strings.Trim(index, "[]' ")
results = append(results, DCUProcess{Pid: pid, Index: index})
}
return results, nil
}
func QueryName(pid string) (string, error) {
cmd := exec.Command("cat", "/proc/"+pid+"/cgroup")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
return "", err
}
output := out.String()
re := regexp.MustCompile(`docker/([0-9a-f]{64})`)
matches := re.FindStringSubmatch(output)
if len(matches) < 2 {
return "", nil
}
containerID := matches[1]
cmd2 := exec.Command("docker", "ps", "-a", "--format", "{{.ID}} {{.Names}}")
var psOut bytes.Buffer
cmd2.Stdout = &psOut
if err := cmd2.Run(); err != nil {
return "", err
}
lines := strings.Split(psOut.String(), "\n")
containerName := ""
for _, line := range lines {
fields := strings.Fields(line)
if len(fields) >= 2 && strings.HasPrefix(containerID, fields[0]) {
containerName = fields[1]
break
}
}
return containerName, nil
}
func ShowStatus(dcus string) error {
hySmiPath, err := CheckHySmi()
if err != nil {
return fmt.Errorf("Failed to check hy-smi path, Error: %v", err)
}
processes, err := Exec(hySmiPath)
if err != nil {
return fmt.Errorf("Failed to run hy-smi command, Error: %v", err)
}
validDCUs, _, _, err := parseDCUsList(dcus)
if err != nil {
return fmt.Errorf("Failed to parse DCUs list, Error: %v", err)
}
var dcuinfos = make(map[int]DcuInfo)
uuidToDCUIdMap, err := hydcu.GetUniqueIdToDeviceIndexMap()
if err != nil {
return fmt.Errorf("Failed to get UUID to DCU Id mappings: %v", err)
}
for _, dcu := range validDCUs {
for uuid, dcuIds := range uuidToDCUIdMap {
if strings.HasPrefix(uuid, "0x") || strings.HasPrefix(uuid, "0X") {
uuid = uuid[2:]
}
uuid = "0x" + strings.ToUpper(uuid)
if dcuIds[0] == dcu {
dcuinfos[dcu] = DcuInfo{DcuId: dcu, Uuid: uuid}
break
}
}
}
for _, process := range processes {
index, err := strconv.Atoi(process.Index)
if err != nil {
continue
}
if dcu, exists := dcuinfos[index]; exists {
dcu.Pid = append(dcu.Pid, process.Pid)
name, err := QueryName(process.Pid)
if err != nil {
continue
}
dcu.ContainerName = append(dcu.ContainerName, name)
dcuinfos[index] = dcu // 注意:结构体是值类型,需重新赋值
}
}
fmt.Println(strings.Repeat("-", 120))
fmt.Printf("%-40s%-50s%-20s\n", "GPU Id", "UUID", "Container Names")
fmt.Println(strings.Repeat("-", 120))
for dcuId := range dcuinfos {
for idx, name := range dcuinfos[dcuId].ContainerName {
if idx == 0 {
fmt.Printf("%-40v%-50s%-20v\n", dcuId, dcuinfos[dcuId].Uuid, name)
} else {
fmt.Printf("%-40v%-50s%-20v\n", "", "", name)
}
}
}
fmt.Println(strings.Repeat("-", 120))
return nil
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment