Commit 3be6c25c authored by songlinfeng's avatar songlinfeng
Browse files

support dtk-ctk dcu-tracker

parent 6b8eb612
......@@ -5,14 +5,27 @@
package main
import (
"dtk-container-toolkit/internal/dcu-tracker"
"dtk-container-toolkit/internal/runtime"
"os"
//"log"
)
func main() {
//f, _ := os.OpenFile("/var/log/app.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
//log.SetOutput(f)
r := runtime.New()
//log.Println("Log written to file")
err := r.Run(os.Args)
if err != nil {
if err != nil {
// log.Println("failed to run runtime")
dcuTracker, err := dcuTracker.New()
if err != nil {
// log.Println("failed to new dcutracker")
os.Exit(1)
}
dcuTracker.ReleaseDCUs(os.Args[len(os.Args)-1])
// log.Println("releasedcu")
os.Exit(1)
}
}
package dcuTracker
import (
"dtk-container-toolkit/cmd/dtk-ctk/dcu-tracker/disable"
"dtk-container-toolkit/cmd/dtk-ctk/dcu-tracker/enable"
"dtk-container-toolkit/cmd/dtk-ctk/dcu-tracker/initialize"
"dtk-container-toolkit/cmd/dtk-ctk/dcu-tracker/release"
"dtk-container-toolkit/cmd/dtk-ctk/dcu-tracker/reset"
"dtk-container-toolkit/cmd/dtk-ctk/dcu-tracker/status"
"dtk-container-toolkit/internal/dcu-tracker"
"dtk-container-toolkit/internal/logger"
"fmt"
"os/user"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (m command) build() *cli.Command {
dcuTrackerCmd := cli.Command{
Name: "dcu-tracker",
Usage: "DCU Tracker related commands",
UsageText: `dtk-ctk dcu-tracker [dcu-ids] [accessibility]
Arguments:
dcu-ids Comma-separated list of DCU IDs (comma separated list, range operator, all)
accessibility Must be either 'exclusive' or 'shared'
Examples:
dtk-ctk dcu-tracker 0,1,2 exclusive
dtk-ctk dcu-tracker 0,1-2 shared
dtk-ctk dcu-tracker all shared
OR
dtk-ctk dcu-tracker [command] [options]`,
Before: func(c *cli.Context) error { return m.validateGenOptions(c) },
Action: func(c *cli.Context) error { return m.performAction(c) },
}
dcuTrackerCmd.Subcommands = []*cli.Command{
disable.NewCommand(m.logger),
enable.NewCommand(m.logger),
initialize.NewCommand(m.logger),
reset.NewCommand(m.logger),
release.NewCommand(m.logger),
status.NewCommand(m.logger),
}
return &dcuTrackerCmd
}
func (m command) validateGenOptions(c *cli.Context) error {
curUser, err := user.Current()
if err != nil || curUser.Uid != "0" {
return fmt.Errorf("Permission denied: Not running as root")
}
return nil
}
func (m command) performAction(c *cli.Context) error {
if c.Args().Len() == 0 {
return cli.ShowAppHelp(c)
}
if c.Args().Len() > 2 {
return cli.Exit("Error: Missing arguments. Usgae: dcu-tracker <dcu_id> <operation>", 1)
}
gpuIDs := c.Args().Get(0)
operation := c.Args().Get(1)
dcuTracker, err := dcuTracker.New()
if err != nil {
return fmt.Errorf("Failed to create DCU tracker, Error: %v", err)
}
switch operation {
case "exclusive":
dcuTracker.MakeDCUsExclusive(gpuIDs)
case "shared":
dcuTracker.MakeDCUsShared(gpuIDs)
default:
return cli.Exit("Error: Invalid operation. Must be 'exclusive' or 'shared", 1)
}
return nil
}
package disable
import (
dcuTracker "dtk-container-toolkit/internal/dcu-tracker"
"fmt"
"os/user"
"dtk-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (m command) build() *cli.Command {
dcuTrackerDisableCmd := cli.Command{
Name: "disable",
Usage: "Disable the DCU Tracker",
UsageText: "dtk-ctk dcu-tracker disable [options]",
Before: func(c *cli.Context) error { return validateGenOptions(c) },
Action: func(c *cli.Context) error { return performAction(c) },
}
return &dcuTrackerDisableCmd
}
func validateGenOptions(c *cli.Context) error {
curUser, err := user.Current()
if err != nil || curUser.Uid != "0" {
return fmt.Errorf("Permission denied: Not running as root")
}
return nil
}
func performAction(c *cli.Context) error {
dcuTracker, err := dcuTracker.New()
if err != nil {
return fmt.Errorf("Failed to create GPU tracker, Error: %v", err)
}
err = dcuTracker.Disable()
if err != nil {
return fmt.Errorf("Failed to Reset GPU Tracker, Error: %v", err)
}
return nil
}
package enable
import (
dcuTracker "dtk-container-toolkit/internal/dcu-tracker"
"fmt"
"os/user"
"dtk-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (m command) build() *cli.Command {
dcuTrackerEnableCmd := cli.Command{
Name: "enable",
Usage: "Enable the DCU Tracker",
UsageText: "dtk-ctk dcu-tracker enable [options]",
Before: func(c *cli.Context) error { return validateGenOptions(c) },
Action: func(c *cli.Context) error { return performAction(c) },
}
return &dcuTrackerEnableCmd
}
func validateGenOptions(c *cli.Context) error {
curUser, err := user.Current()
if err != nil || curUser.Uid != "0" {
return fmt.Errorf("Permission denied: Not running as root")
}
return nil
}
func performAction(c *cli.Context) error {
dcuTracker, err := dcuTracker.New()
if err != nil {
return fmt.Errorf("Failed to create DCU tracker, Error: %v", err)
}
err = dcuTracker.Enable()
if err != nil {
return fmt.Errorf("Failed to Reset DCU Tracker, Error: %v", err)
}
return nil
}
package initialize
import (
dcuTracker "dtk-container-toolkit/internal/dcu-tracker"
"fmt"
"os/user"
"dtk-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (m command) build() *cli.Command {
// dcuTrackerInitCmd initializes the DCU Tracker.
dcuTrackerInitCmd := cli.Command{
Name: "init",
Hidden: true,
Usage: "Initialize the DCU Tracker",
UsageText: "dtk-ctk dcu-tracker init [options]",
Before: func(c *cli.Context) error { return validateGenOptions(c) },
Action: func(c *cli.Context) error { return performAction(c) },
}
return &dcuTrackerInitCmd
}
func validateGenOptions(c *cli.Context) error {
curUser, err := user.Current()
if err != nil || curUser.Uid != "0" {
return fmt.Errorf("Permission denied: Not running as root")
}
return nil
}
func performAction(c *cli.Context) error {
dcuTracker, err := dcuTracker.New()
if err != nil {
return fmt.Errorf("Failed to create GPU tracker, Error: %v", err)
}
err = dcuTracker.Init()
if err != nil {
return fmt.Errorf("Failed to Initialize GPU Tracker, Error: %v", err)
}
return nil
}
package release
import (
dcuTracker "dtk-container-toolkit/internal/dcu-tracker"
"fmt"
"os/user"
"dtk-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (m command) build() *cli.Command {
dcuTrackerReleaseCmd := cli.Command{
Name: "release",
Hidden: true,
Usage: "Release DCUs used by a container",
UsageText: `dtk-ctk dcu-tracker release [container_id]
Arguments:
container_id container ID of the container
Examples:
dtk-ctk dcu-tracker release a4e19862b4e2a1b04a1f793f346d0411f4a0a3857578c526a25ac6c858168fd8`,
Before: func(c *cli.Context) error {
return validateGenOptions(c)
},
Action: func(c *cli.Context) error {
return performAction(c)
},
}
return &dcuTrackerReleaseCmd
}
func validateGenOptions(c *cli.Context) error {
curUser, err := user.Current()
if err != nil || curUser.Uid != "0" {
return fmt.Errorf("Permission denied: Not running as root")
}
return nil
}
func performAction(c *cli.Context) error {
if c.Args().Len() == 0 {
return cli.ShowAppHelp(c)
}
dcuTracker, err := dcuTracker.New()
if err != nil {
return fmt.Errorf("Failed to create GPU tracker, Error: %v", err)
}
err = dcuTracker.ReleaseDCUs(c.Args().Get(0))
if err != nil {
return fmt.Errorf("Failed to release GPUs, Error: %v", err)
}
return nil
}
package reset
import (
dcuTracker "dtk-container-toolkit/internal/dcu-tracker"
"fmt"
"os/user"
"dtk-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (m command) build() *cli.Command {
dcuTrackerResetCmd := cli.Command{
Name: "reset",
Usage: "Reset the DCU Tracker",
UsageText: "dtk-ctk dcu-tracker reset [options]",
Before: func(c *cli.Context) error {
return validateGenOptions(c)
},
Action: func(c *cli.Context) error {
return performAction(c)
},
}
return &dcuTrackerResetCmd
}
func validateGenOptions(c *cli.Context) error {
curUser, err := user.Current()
if err != nil || curUser.Uid != "0" {
return fmt.Errorf("Permission denied: Not running as root")
}
return nil
}
func performAction(c *cli.Context) error {
dcuTracker, err := dcuTracker.New()
if err != nil {
return fmt.Errorf("Failed to create DCU tracker, Error: %v", err)
}
err = dcuTracker.Reset()
if err != nil {
return fmt.Errorf("Failed to Reset DCU Tracker, Error: %v", err)
}
return nil
}
package status
import (
dcuTracker "dtk-container-toolkit/internal/dcu-tracker"
"fmt"
"os/user"
"dtk-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
logger logger.Interface
}
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
func (c *command) build() *cli.Command {
dcuTrackerStatusCmd := cli.Command{
Name: "status",
Usage: "Show Status of DCUs",
UsageText: "dtk-ctk dcu-tracker status [options]",
Before: func(c *cli.Context) error {
return validateGenOptions(c)
},
Action: func(c *cli.Context) error {
return performAction(c)
},
}
return &dcuTrackerStatusCmd
}
func validateGenOptions(c *cli.Context) error {
curUser, err := user.Current()
if err != nil || curUser.Uid != "0" {
return fmt.Errorf("Permission denied: Not running as root")
}
return nil
}
func performAction(c *cli.Context) error {
dcuTracker, err := dcuTracker.New()
if err != nil {
return fmt.Errorf("Failed to create GPU tracker, Error: %v", err)
}
err = dcuTracker.ShowStatus()
if err != nil {
return fmt.Errorf("Failed to show GPUs status, Error: %v", err)
}
return nil
}
......@@ -7,6 +7,7 @@ package main
import (
"dtk-container-toolkit/cmd/dtk-ctk/cdi"
"dtk-container-toolkit/cmd/dtk-ctk/config"
"dtk-container-toolkit/cmd/dtk-ctk/dcu-tracker"
"dtk-container-toolkit/cmd/dtk-ctk/hook"
"dtk-container-toolkit/cmd/dtk-ctk/runtime"
"dtk-container-toolkit/cmd/dtk-ctk/rootless"
......@@ -73,6 +74,7 @@ func main() {
// Define the subcommands
c.Commands = []*cli.Command{
rootless.NewCommand(logger),
dcuTracker.NewCommand(logger),
runtime.NewCommand(logger),
config.NewCommand(logger),
hook.NewCommand(logger),
......
module dtk-container-toolkit
go 1.24
go 1.24.0
toolchain go1.24.6
require (
github.com/gofrs/flock v0.13.0
github.com/opencontainers/runtime-spec v1.2.1
github.com/pelletier/go-toml v1.9.5
github.com/sirupsen/logrus v1.9.3
github.com/urfave/cli/v2 v2.27.5
gopkg.in/ini.v1 v1.67.0
tags.cncf.io/container-device-interface v0.8.1
tags.cncf.io/container-device-interface/specs-go v0.8.0
)
......@@ -14,12 +18,13 @@ require (
require (
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
github.com/fsnotify/fsnotify v1.8.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
golang.org/x/mod v0.23.0 // indirect
golang.org/x/sys v0.30.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
golang.org/x/sys v0.37.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
......@@ -2,11 +2,14 @@ github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw=
github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
......@@ -14,6 +17,10 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
......@@ -27,6 +34,8 @@ github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3v
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
......@@ -35,8 +44,9 @@ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
......@@ -55,10 +65,11 @@ golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
......
......@@ -17,8 +17,9 @@ import (
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type DTK struct {
env map[string]string
mounts []specs.Mount
env map[string]string
mounts []specs.Mount
ContainerId string
}
// NewDTKImageFromSpec creates a DTK image from the input OCI runtime spec.
......
This diff is collapsed.
......@@ -5,6 +5,7 @@
package discover
import (
"dtk-container-toolkit/internal/config/image"
"dtk-container-toolkit/internal/info/drm"
"dtk-container-toolkit/internal/logger"
"dtk-container-toolkit/internal/lookup"
......@@ -81,7 +82,7 @@ func (s selectDeviceByPath) HookIsSelected(Hook) bool {
}
// NewCommonHCUDiscoverer creates a discoverer for the mounts required by HCU.
func NewCommonHCUDiscoverer(logger logger.Interface, dtkCDIHookPath string, driver *root.Driver, isMount bool) (Discover, error) {
func NewCommonHCUDiscoverer(logger logger.Interface, dtkCDIHookPath string, driver *root.Driver, isMount bool, containerImage image.DTK) (Discover, error) {
metaDevices := NewCharDeviceDiscoverer(
logger,
driver.Root,
......@@ -114,11 +115,18 @@ func NewCommonHCUDiscoverer(logger logger.Interface, dtkCDIHookPath string, driv
linkHook = CreateSymlinkHook(dtkCDIHookPath, []string{"/usr/local/hyhal::/opt/hyhal"})
}
var trackHook Hook
value := containerImage.Getenv(image.EnvVarDTKVisibleDevices)
if len(value) > 0 {
trackHook = CreateTrackHook(dtkCDIHookPath, containerImage.ContainerId)
}
d := Merge(
metaDevices,
libraries,
NewUserGroupDiscover(logger),
linkHook,
trackHook,
)
return d, nil
}
......@@ -49,6 +49,23 @@ func CreateSymlinkHook(dtkCDIHookPath string, links []string) Hook {
)
}
func CreateTrackHook(dtkCDIHookPath string, containerId string) Hook {
return CreateDtkTrackHook(
dtkCDIHookPath,
"dcu-tracker",
"release",
containerId,
)
}
func CreateDtkTrackHook(path string, s string, s2 string, id string) Hook {
return Hook{
Lifecycle: cdi.PoststopHook,
Path: path,
Args: []string{"dtk-ctk", s, s2, id},
}
}
// CreateDtkCDIHook creates a hook which invokes the DTK Container CLI hook subcommand.
func CreateDtkCDIHook(dtkCDIHookPath string, hookName string, additionalArgs ...string) Hook {
return cdiHook(dtkCDIHookPath).Create(hookName, additionalArgs...)
......
package hydcu
import (
"bufio"
"fmt"
"io/ioutil"
"math"
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
)
type DeviceInfo struct {
DrmDevices []string
PartitionType string
}
type FileSystem interface {
Stat(name string) (os.FileInfo, error)
Glob(pattern string) ([]string, error)
ReadFile(name string) ([]byte, error)
GetDeviceStat(dev string, format string) (string, error)
}
type DefaultFS struct{}
var defaultFS FileSystem = &DefaultFS{}
func (fs *DefaultFS) Stat(name string) (os.FileInfo, error) { return os.Stat(name) }
func (fs *DefaultFS) Glob(pattern string) ([]string, error) { return filepath.Glob(pattern) }
func (fs *DefaultFS) ReadFile(name string) ([]byte, error) { return os.ReadFile(name) }
func (fs *DefaultFS) GetDeviceStat(dev string, format string) (string, error) {
out, err := exec.Command("stat", "-c", format, dev).Output()
if err != nil {
fmt.Println("stat failed for %v: Error %v", dev, err)
return "", err
}
return strings.TrimSpace(string(out)), nil
}
// GetHYDCUs return the list of all the DCU devices on the system.
func GetHYDCUs() ([]DeviceInfo, error) { return GetHyDCUsWithFS(defaultFS) }
func GetHyDCUsWithFS(fs FileSystem) ([]DeviceInfo, error) {
if _, err := fs.Stat("/sys/module/hydcu/drivers/"); err != nil {
return nil, err
}
renderDevIds := GetDevIdsFromTopology(fs)
// Map to store devices by unique_id to maintain grouping
uniqueIdDevices := make(map[string][]DeviceInfo)
var uniqueIds []string // To maintain order
pciDevs, err := fs.Glob("/sys/module/hydcu/drivers/pci:hydcu/[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]:*")
if err != nil {
fmt.Println("Failed to find hydcu driver directories: %v", err)
return nil, err
}
// Process platform devices for partitions
platformDevs, _ := fs.Glob("/sys/devices/platform/hydcu_xcp_*")
// Combine aboth PCI and platform devices
allDevs := append(pciDevs, platformDevs...)
//Process all devices using the same logic
for _, path := range allDevs {
computePartitionFile := filepath.Join(path, "current_compute_partition")
memoryPartitionFile := filepath.Join(path, "current_memory_partition")
computePartitionType, memoryPartitionType, combinedPartitionType := "", "", ""
// Read the compute partition
if data, err := ioutil.ReadFile(computePartitionFile); err == nil {
computePartitionType = strings.ToLower(strings.TrimSpace(string(data)))
}
// Read the memory partition
if data, err := ioutil.ReadFile(memoryPartitionFile); err == nil {
memoryPartitionType = strings.ToLower(strings.TrimSpace(string(data)))
}
combinedPartitionType = computePartitionType + "_" + memoryPartitionType
if combinedPartitionType == "_" {
combinedPartitionType = ""
}
drms, err := fs.Glob(path + "/drm/*")
if err != nil {
return nil, err
}
drmDevs := []string{}
renderMinor := 0
for _, drm := range drms {
dev := filepath.Base(drm)
if len(dev) >= 4 && dev[0:4] == "card" || len(dev) >= 7 && dev[0:7] == "renderD" {
drmDevs = append(drmDevs, "/dev/dri/"+dev)
if len(dev) >= 7 && dev[0:7] == "renderD" {
renderMinor, _ = strconv.Atoi(dev[7:])
}
}
}
if len(drmDevs) > 0 && renderMinor > 0 {
if devID, exists := renderDevIds[renderMinor]; exists {
if _, exists := uniqueIdDevices[devID]; !exists {
uniqueIds = append(uniqueIds, devID)
}
uniqueIdDevices[devID] = append(uniqueIdDevices[devID], DeviceInfo{DrmDevices: drmDevs, PartitionType: combinedPartitionType})
}
}
}
// Sort devices within each unique_id group by render minor number
for _, devID := range uniqueIds {
sort.Slice(uniqueIdDevices[devID], func(i, j int) bool {
getRenderID := func(devInfo DeviceInfo) int {
devs := devInfo.DrmDevices
for _, dev := range devs {
baseDev := filepath.Base(dev)
if len(baseDev) >= 7 && strings.HasPrefix(baseDev, "renderD") {
id, _ := strconv.Atoi(strings.TrimPrefix(baseDev, "renderD"))
return id
}
}
return 0
}
return getRenderID(uniqueIdDevices[devID][i]) < getRenderID(uniqueIdDevices[devID][j])
})
}
// Combine all devices maintaining the unique_id order
var devs []DeviceInfo
for _, devID := range uniqueIds {
devs = append(devs, uniqueIdDevices[devID]...)
}
return devs, nil
}
var topoUniqueIdRe = regexp.MustCompile(`unique_id\s(\d+)`)
var renderMinorRe = regexp.MustCompile(`drm_render_minor\s(\d+)`)
// GetDevIdsFromTopology returns a map of render minor numbers to unique_ids
func GetDevIdsFromTopology(fs FileSystem, topoRootParam ...string) map[int]string {
topoRoot := "/sys/class/kfd/kfd"
if len(topoRootParam) == 1 {
topoRoot = topoRootParam[0]
}
renderDevIds := make(map[int]string)
nodeFiles, err := fs.Glob(topoRoot + "/topology/nodes/*/properties")
if err != nil {
return renderDevIds
}
for _, nodeFile := range nodeFiles {
renderMinor, err := ParseTopologyProperties(fs, nodeFile, renderMinorRe)
if err != nil {
continue
}
if renderMinor <= 0 || renderMinor > math.MaxInt32 {
continue
}
devID, err := ParseTopologyPropertiesString(fs, nodeFile, topoUniqueIdRe)
if err != nil {
continue
}
renderDevIds[int(renderMinor)] = devID
}
return renderDevIds
}
// ParseTopologyProperties parses for a property value in kfd topology file as int64
// The format is usually one entry per line <name> <value>.
func ParseTopologyProperties(fs FileSystem, path string, re *regexp.Regexp) (int64, error) {
content, err := fs.ReadFile(path)
if err != nil {
return 0, err
}
scanner := bufio.NewScanner(strings.NewReader(string(content)))
for scanner.Scan() {
matches := re.FindStringSubmatch(scanner.Text())
if matches != nil {
return strconv.ParseInt(matches[1], 0, 64)
}
}
return 0, fmt.Errorf("property not found in %s", path)
}
// ParseTopologyPropertiesString parses for a property value in kfd topology file as string
// The format is usually one entry per line <name> <value>.
func ParseTopologyPropertiesString(fs FileSystem, path string, re *regexp.Regexp) (string, error) {
content, err := fs.ReadFile(path)
if err != nil {
return "", err
}
scanner := bufio.NewScanner(strings.NewReader(string(content)))
for scanner.Scan() {
matches := re.FindStringSubmatch(scanner.Text())
if matches != nil {
return matches[1], nil
}
}
return "", fmt.Errorf("property not found in %s", path)
}
// GetUniqueIdToDeviceIndexMap returns a map of unique_id (as hex string) to device indices
func GetUniqueIdToDeviceIndexMap() (map[string][]int, error) {
return GetUniqueIdToDeviceIndexMapWithFS(defaultFS)
}
// GetUniqueIdToDeviceIndexMapWithFS creates a mapping from unique_id (hex format) to device indices
func GetUniqueIdToDeviceIndexMapWithFS(fs FileSystem) (map[string][]int, error) {
devs, err := GetHyDCUsWithFS(fs)
if err != nil {
return nil, err
}
renderDevIds := GetDevIdsFromTopology(fs)
uniqueIdToIndex := make(map[string][]int)
// Process each device group and assign index
for deviceIndex, deviceGroup := range devs {
// Find the render minor for this device group
for _, device := range deviceGroup.DrmDevices {
// Extract render minor from device path like /dev/dri/renderD128
if strings.Contains(device, "renderD") {
renderMinorStr := strings.TrimPrefix(filepath.Base(device), "renderD")
if renderMinor, err := strconv.Atoi(renderMinorStr); err == nil {
if uniqueId, exists := renderDevIds[renderMinor]; exists {
// Convert decimal unique_id to hex format (without 0x prefix)
if uniqueIdInt, err := strconv.ParseUint(uniqueId, 10, 64); err == nil {
hexUniqueId := fmt.Sprintf("0x%x", uniqueIdInt)
uniqueIdToIndex[hexUniqueId] = append(uniqueIdToIndex[hexUniqueId], deviceIndex)
// Also support without 0x prefix
hexUniqueIdNoPrefix := fmt.Sprintf("%x", uniqueIdInt)
uniqueIdToIndex[hexUniqueIdNoPrefix] = append(uniqueIdToIndex[hexUniqueIdNoPrefix], deviceIndex)
}
}
}
break // Only need one render device per group
}
}
}
return uniqueIdToIndex, nil
}
......@@ -7,6 +7,7 @@ package modifier
import (
"dtk-container-toolkit/internal/config"
"dtk-container-toolkit/internal/config/image"
"dtk-container-toolkit/internal/dcu-tracker"
"dtk-container-toolkit/internal/discover"
"dtk-container-toolkit/internal/logger"
"dtk-container-toolkit/internal/lookup"
......@@ -22,12 +23,23 @@ import (
// The value of the DTK_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerImage image.DTK, driver *root.Driver, isMount bool) (oci.SpecModifier, error) {
dtkCDIHookPath := cfg.DTKCTKConfig.Path
value := containerImage.Getenv(image.EnvVarDTKVisibleDevices)
if len(value) > 0 {
dcuTracker, err := dcuTracker.New()
if err == nil {
_, err = dcuTracker.ReserveDCUs(value, containerImage.ContainerId)
logger.Infof("ReserveDCUs %s", value)
if err != nil {
return nil, fmt.Errorf("failed to reserve DCUs: %v", err)
}
}
}
comDiscoverer, err := discover.NewCommonHCUDiscoverer(
logger,
dtkCDIHookPath,
driver,
isMount,
containerImage,
)
if err != nil {
return nil, fmt.Errorf("failed to create mounts discoverer: %v", err)
......
......@@ -36,7 +36,7 @@ func newDTKContainerRuntime(logger logger.Interface, cfg *config.Config, argv []
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
}
specModifier, err := newSpecModifier(logger, cfg, ociSpec, driver)
specModifier, err := newSpecModifier(logger, cfg, ociSpec, driver, argv[len(argv) - 1])
if err != nil {
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
}
......@@ -53,7 +53,7 @@ func newDTKContainerRuntime(logger logger.Interface, cfg *config.Config, argv []
}
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver, containerId string) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
......@@ -63,7 +63,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
if err != nil {
return nil, err
}
image.ContainerId = containerId
mode := info.ResolveAutoMode(logger, cfg.DTKContainerRuntimeConfig.Mode, image)
// We update the mode here so that we can continue passing just the config to other functions.
cfg.DTKContainerRuntimeConfig.Mode = mode
......
......@@ -8,6 +8,7 @@ import (
"dtk-container-toolkit/internal/discover"
"dtk-container-toolkit/internal/edits"
"dtk-container-toolkit/internal/platform-support/dhcu"
"dtk-container-toolkit/internal/config/image"
"dtk-container-toolkit/pkg/c3000cdi/spec"
"dtk-container-toolkit/pkg/go-c3000lib/pkg/device"
"fmt"
......@@ -155,6 +156,7 @@ func (l *c3000smilib) newCommonC3000SmiDiscoverer() (discover.Discover, error) {
l.dtkCDIHookPath,
l.driver,
true,
image.DTK{},
)
return d, err
......
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment