Commit 4b9ce81e authored by songlinfeng's avatar songlinfeng 💬
Browse files

support vDCU

parent 2477e403
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="AutoImportSettings">
<option name="autoReloadType" value="ALL" />
</component>
<component name="ChangeListManager">
<list default="true" id="2e903f06-2eda-40fc-a9ea-e83b0a0df370" name="更改" comment="">
<change beforePath="$PROJECT_DIR$/internal/config/image/dtk_image.go" beforeDir="false" afterPath="$PROJECT_DIR$/internal/config/image/dtk_image.go" afterDir="false" />
<change beforePath="$PROJECT_DIR$/internal/config/image/envvars.go" beforeDir="false" afterPath="$PROJECT_DIR$/internal/config/image/envvars.go" afterDir="false" />
<change beforePath="$PROJECT_DIR$/internal/discover/graphics.go" beforeDir="false" afterPath="$PROJECT_DIR$/internal/discover/graphics.go" afterDir="false" />
<change beforePath="$PROJECT_DIR$/internal/discover/mounts.go" beforeDir="false" afterPath="$PROJECT_DIR$/internal/discover/mounts.go" afterDir="false" />
<change beforePath="$PROJECT_DIR$/internal/modifier/gated.go" beforeDir="false" afterPath="$PROJECT_DIR$/internal/modifier/gated.go" afterDir="false" />
<change beforePath="$PROJECT_DIR$/internal/modifier/graphics.go" beforeDir="false" afterPath="$PROJECT_DIR$/internal/modifier/graphics.go" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Go File" />
</list>
</option>
</component>
<component name="GOROOT" url="file://$PROJECT_DIR$/../sdk/go1.25.3" />
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="MarkdownSettingsMigration">
<option name="stateVersion" value="1" />
</component>
<component name="ProjectColorInfo">{
&quot;associatedIndex&quot;: 7
}</component>
<component name="ProjectId" id="34gVepsnW85Ws6mlzFOA01LkydX" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent"><![CDATA[{
"keyToString": {
"DefaultGoTemplateProperty": "Go File",
"ModuleVcsDetector.initialDetectionPerformed": "true",
"RunOnceActivity.GoLinterPluginOnboarding": "true",
"RunOnceActivity.GoLinterPluginStorageMigration": "true",
"RunOnceActivity.OpenProjectViewOnStart": "true",
"RunOnceActivity.ShowReadmeOnStart": "true",
"RunOnceActivity.git.unshallow": "true",
"RunOnceActivity.go.formatter.settings.were.checked": "true",
"RunOnceActivity.go.migrated.go.modules.settings": "true",
"RunOnceActivity.go.modules.automatic.dependencies.download": "true",
"RunOnceActivity.go.modules.go.list.on.any.changes.was.set": "true",
"WebServerToolWindowFactoryState": "false",
"git-widget-placeholder": "main",
"go.import.settings.migrated": "true",
"go.sdk.automatically.set": "true",
"last_opened_file_path": "D:/container-toolkit",
"node.js.detected.package.eslint": "true",
"node.js.selected.package.eslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
"settings.editor.selected.configurable": "preferences.pluginManager",
"vue.rearranger.settings.migration": "true"
}
}]]></component>
<component name="SharedIndexes">
<attachedChunks>
<set>
<option value="bundled-gosdk-f466f9b0953e-146d08934cbf-org.jetbrains.plugins.go.sharedIndexes.bundled-GO-252.26830.102" />
<option value="bundled-js-predefined-d6986cc7102b-3aa1da707db6-JavaScript-GO-252.26830.102" />
</set>
</attachedChunks>
</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="默认任务">
<changelist id="2e903f06-2eda-40fc-a9ea-e83b0a0df370" name="更改" comment="" />
<created>1761635300250</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1761635300250</updated>
</task>
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="3" />
</component>
<component name="VgoProject">
<settings-migrated>true</settings-migrated>
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" type="DlvLineBreakpoint">
<url>file://$PROJECT_DIR$/pkg/c3000cdi/lib-c3000smi.go</url>
<line>158</line>
<option name="timeStamp" value="1" />
</line-breakpoint>
<line-breakpoint enabled="true" type="DlvLineBreakpoint">
<url>file://$PROJECT_DIR$/internal/dcu-tracker/dcu-tracker.go</url>
<line>2</line>
<option name="timeStamp" value="2" />
</line-breakpoint>
<line-breakpoint enabled="true" type="DlvLineBreakpoint">
<url>file://$PROJECT_DIR$/cmd/dcu-ctk/docker/docker.go</url>
<line>37</line>
<option name="timeStamp" value="3" />
</line-breakpoint>
<line-breakpoint enabled="true" type="DlvLineBreakpoint">
<url>file://$PROJECT_DIR$/cmd/dcu-ctk/runtime/configure/configure.go</url>
<line>205</line>
<option name="timeStamp" value="4" />
</line-breakpoint>
<line-breakpoint enabled="true" type="DlvLineBreakpoint">
<url>file://$PROJECT_DIR$/internal/oci/spec_file.go</url>
<line>6</line>
<option name="timeStamp" value="5" />
</line-breakpoint>
<line-breakpoint enabled="true" type="DlvLineBreakpoint">
<url>file://$PROJECT_DIR$/internal/modifier/sysfs.go</url>
<line>6</line>
<option name="timeStamp" value="6" />
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
</project>
\ No newline at end of file
...@@ -5,12 +5,15 @@ ...@@ -5,12 +5,15 @@
package image package image
import ( import (
"bufio"
"dcu-container-toolkit/internal/hydcu"
"fmt" "fmt"
"github.com/opencontainers/runtime-spec/specs-go"
"os"
"path/filepath" "path/filepath"
"strings" "regexp"
"dcu-container-toolkit/internal/hydcu"
"strconv" "strconv"
"github.com/opencontainers/runtime-spec/specs-go" "strings"
"tags.cncf.io/container-device-interface/pkg/parser" "tags.cncf.io/container-device-interface/pkg/parser"
) )
...@@ -18,9 +21,9 @@ import ( ...@@ -18,9 +21,9 @@ import (
// a map of environment variable to values that can be used to perform lookups // a map of environment variable to values that can be used to perform lookups
// such as requirements. // such as requirements.
type DTK struct { type DTK struct {
env map[string]string env map[string]string
mounts []specs.Mount mounts []specs.Mount
ContainerId string ContainerId string
} }
// NewDTKImageFromSpec creates a DTK image from the input OCI runtime spec. // NewDTKImageFromSpec creates a DTK image from the input OCI runtime spec.
...@@ -222,3 +225,43 @@ func (i DTK) CDIDevicesFromMounts() []string { ...@@ -222,3 +225,43 @@ func (i DTK) CDIDevicesFromMounts() []string {
func (i DTK) VisibleDevicesFromEnvVar() []string { func (i DTK) VisibleDevicesFromEnvVar() []string {
return i.DevicesFromEnvvars(EnvVarDTKVisibleDevices, EnvVarNvidiaVisibleDevices).List() return i.DevicesFromEnvvars(EnvVarDTKVisibleDevices, EnvVarNvidiaVisibleDevices).List()
} }
func (i DTK) VdcuFromEnv(envVar string) []string {
getPci := func(path string) string {
var pciIds []string
pciRegex := regexp.MustCompile(`PciBusId:\s*([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])`)
file, err := os.Open(path)
if err != nil {
return ""
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
matches := pciRegex.FindStringSubmatch(line)
if len(matches) > 0 {
pciIds = append(pciIds, matches[1])
}
}
if len(pciIds) > 0 {
return pciIds[0]
}
return ""
}
var vdcuDevices []string
if value, ok := i.env[envVar]; ok {
for _, index := range strings.Split(value, ",") {
var path = fmt.Sprintf("/etc/vdev/vdev%s.conf", index)
var pciId = getPci(path)
if len(pciId) > 0 {
vdcuDevices = append(vdcuDevices, pciId)
}
}
}
return vdcuDevices
}
func (i DTK) VdcuVisibleDevicesFromEnvVar() []string {
return i.VdcuFromEnv(EnvVarVDTKVisibleDevices)
}
...@@ -8,4 +8,5 @@ const ( ...@@ -8,4 +8,5 @@ const (
EnvVarDTKVisibleDevices = "DCU_VISIBLE_DEVICES" EnvVarDTKVisibleDevices = "DCU_VISIBLE_DEVICES"
EnvVarNvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" EnvVarNvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
EnvROCmVersion = "ROCM_VERSION" EnvROCmVersion = "ROCM_VERSION"
EnvVarVDTKVisibleDevices = "VDCU_VISIBLE_DEVICES"
) )
...@@ -118,10 +118,18 @@ func NewCommonHCUDiscoverer(logger logger.Interface, dtkCDIHookPath string, driv ...@@ -118,10 +118,18 @@ func NewCommonHCUDiscoverer(logger logger.Interface, dtkCDIHookPath string, driv
var trackHook Hook var trackHook Hook
value := containerImage.Getenv(image.EnvVarDTKVisibleDevices) value := containerImage.Getenv(image.EnvVarDTKVisibleDevices)
value1 := containerImage.Getenv(image.EnvVarNvidiaVisibleDevices) value1 := containerImage.Getenv(image.EnvVarNvidiaVisibleDevices)
if len(value) > 0 || len(value1) > 0{ value2 := containerImage.Getenv(image.EnvVarVDTKVisibleDevices)
if len(value) > 0 || len(value1) > 0 {
trackHook = CreateTrackHook(dtkCDIHookPath, containerImage.ContainerId) trackHook = CreateTrackHook(dtkCDIHookPath, containerImage.ContainerId)
} }
if len(value2) > 0 {
m, ok := libraries.(*mounts)
if ok {
m.addVdcu(value2)
}
}
var d Discover var d Discover
if trackHook.Lifecycle == "" { if trackHook.Lifecycle == "" {
d = Merge( d = Merge(
......
...@@ -48,9 +48,9 @@ func (d *mounts) Mounts() ([]Mount, error) { ...@@ -48,9 +48,9 @@ func (d *mounts) Mounts() ([]Mount, error) {
return nil, fmt.Errorf("no lookup defined") return nil, fmt.Errorf("no lookup defined")
} }
var temps []Mount
if d.cache != nil { if d.cache != nil {
d.logger.Debugf("returning cached mounts") temps = d.cache
return d.cache, nil
} }
d.Lock() d.Lock()
...@@ -101,6 +101,10 @@ func (d *mounts) Mounts() ([]Mount, error) { ...@@ -101,6 +101,10 @@ func (d *mounts) Mounts() ([]Mount, error) {
for _, m := range uniqueMounts { for _, m := range uniqueMounts {
mounts = append(mounts, m) mounts = append(mounts, m)
} }
for _, item := range temps {
mounts = append(mounts, item)
}
d.cache = mounts d.cache = mounts
return d.cache, nil return d.cache, nil
...@@ -114,3 +118,20 @@ func (d *mounts) relativeTo(path string) string { ...@@ -114,3 +118,20 @@ func (d *mounts) relativeTo(path string) string {
return strings.TrimPrefix(path, d.root) return strings.TrimPrefix(path, d.root)
} }
func (d *mounts) addVdcu(indexs string) {
var mounts []Mount
for _, index := range strings.Split(indexs, ",") {
var mount = Mount{
HostPath: fmt.Sprintf("/etc/vdev/vdev%s.conf", index),
Path: fmt.Sprintf("/etc/vdev/docker/vdev%s.conf", index),
Options: []string{
"rbind",
"ro",
"rprivate",
},
}
mounts = append(mounts, mount)
}
d.cache = mounts
}
...@@ -22,8 +22,10 @@ import ( ...@@ -22,8 +22,10 @@ import (
// If not devices are selected, no changes are made. // If not devices are selected, no changes are made.
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.DTK, driver *root.Driver) (oci.SpecModifier, error) { func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.DTK, driver *root.Driver) (oci.SpecModifier, error) {
if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 { if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
logger.Infof("No modification required; no devices requested") if devices = image.VdcuVisibleDevicesFromEnvVar(); len(devices) == 0 {
return nil, nil logger.Infof("No modification required; no devices requested")
return nil, nil
}
} }
var discoverers []discover.Discover var discoverers []discover.Discover
......
...@@ -15,6 +15,7 @@ import ( ...@@ -15,6 +15,7 @@ import (
"dcu-container-toolkit/internal/oci" "dcu-container-toolkit/internal/oci"
"fmt" "fmt"
"path/filepath" "path/filepath"
"slices"
"sort" "sort"
"strconv" "strconv"
) )
...@@ -23,31 +24,31 @@ import ( ...@@ -23,31 +24,31 @@ import (
// The value of the DTK_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made. // The value of the DTK_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerImage image.DTK, driver *root.Driver, isMount bool) (oci.SpecModifier, error) { func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerImage image.DTK, driver *root.Driver, isMount bool) (oci.SpecModifier, error) {
dtkCDIHookPath := cfg.DTKCTKConfig.Path dtkCDIHookPath := cfg.DTKCTKConfig.Path
value := containerImage.Getenv(image.EnvVarDTKVisibleDevices) value := containerImage.Getenv(image.EnvVarDTKVisibleDevices)
if len(value) > 0 { if len(value) > 0 {
dcuTracker, err := dcuTracker.New() dcuTracker, err := dcuTracker.New()
if err == nil { if err == nil {
_, err = dcuTracker.ReserveDCUs(value, containerImage.ContainerId) _, err = dcuTracker.ReserveDCUs(value, containerImage.ContainerId)
logger.Infof("ReserveDCUs %s", value) logger.Infof("ReserveDCUs %s", value)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to reserve DCUs: %v", err) return nil, fmt.Errorf("failed to reserve DCUs: %v", err)
}
} }
}
} }
value = containerImage.Getenv(image.EnvVarNvidiaVisibleDevices) value = containerImage.Getenv(image.EnvVarNvidiaVisibleDevices)
if len(value) > 0 { if len(value) > 0 {
dcuTracker, err := dcuTracker.New() dcuTracker, err := dcuTracker.New()
if err == nil { if err == nil {
_, err = dcuTracker.ReserveDCUs(value, containerImage.ContainerId) _, err = dcuTracker.ReserveDCUs(value, containerImage.ContainerId)
logger.Infof("ReserveDCUs %s", value) logger.Infof("ReserveDCUs %s", value)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to reserve DCUs: %v", err) return nil, fmt.Errorf("failed to reserve DCUs: %v", err)
} }
} }
} }
comDiscoverer, err := discover.NewCommonHCUDiscoverer( comDiscoverer, err := discover.NewCommonHCUDiscoverer(
logger, logger,
dtkCDIHookPath, dtkCDIHookPath,
...@@ -60,7 +61,9 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerI ...@@ -60,7 +61,9 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerI
} }
visibleDevices := containerImage.DevicesFromEnvvars(image.EnvVarDTKVisibleDevices, image.EnvVarNvidiaVisibleDevices) visibleDevices := containerImage.DevicesFromEnvvars(image.EnvVarDTKVisibleDevices, image.EnvVarNvidiaVisibleDevices)
if len(visibleDevices.List()) == 0 { var vdcuDevices = containerImage.VdcuFromEnv(image.EnvVarVDTKVisibleDevices)
if len(visibleDevices.List()) > 0 && len(vdcuDevices) > 0 {
logger.Info("No devices requested") logger.Info("No devices requested")
return nil, nil return nil, nil
} }
...@@ -83,6 +86,11 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerI ...@@ -83,6 +86,11 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerI
} }
} }
for _, vdcuDevice := range vdcuDevices {
if !slices.Contains(selectedBusIds, vdcuDevice) {
selectedBusIds = append(selectedBusIds, vdcuDevice)
}
}
// In standard usage, the devRoot is the same as the driver.Root. // In standard usage, the devRoot is the same as the driver.Root.
devRoot := driver.Root devRoot := driver.Root
drmNodes, err := discover.NewDRMNodesDiscoverer( drmNodes, err := discover.NewDRMNodesDiscoverer(
...@@ -125,7 +133,7 @@ func getDevicesFromDriver() ([]string, error) { ...@@ -125,7 +133,7 @@ func getDevicesFromDriver() ([]string, error) {
if err != nil { if err != nil {
return devices, fmt.Errorf("failed to find devices bus id: %v", err) return devices, fmt.Errorf("failed to find devices bus id: %v", err)
} }
if len(matches) == 0 { if len(matches) == 0 {
m, err := filepath.Glob("/sys/module/hy*cu/drivers/pci:amdgpu/[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]:*") m, err := filepath.Glob("/sys/module/hy*cu/drivers/pci:amdgpu/[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]:*")
if err != nil { if err != nil {
return devices, fmt.Errorf("failed to find devices bus id: %v", err) return devices, fmt.Errorf("failed to find devices bus id: %v", err)
...@@ -133,8 +141,6 @@ func getDevicesFromDriver() ([]string, error) { ...@@ -133,8 +141,6 @@ func getDevicesFromDriver() ([]string, error) {
matches = append(matches, m...) matches = append(matches, m...)
} }
for _, path := range sort.StringSlice(matches) { for _, path := range sort.StringSlice(matches) {
devices = append(devices, filepath.Base(path)) devices = append(devices, filepath.Base(path))
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment