Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
songlinfeng
container-toolkit
Commits
4b9ce81e
"torchvision/csrc/ops/deform_conv2d.cpp" did not exist on "52b8685bde554501604471337a11578fdf026027"
Commit
4b9ce81e
authored
Dec 03, 2025
by
songlinfeng
💬
Browse files
support vDCU
parent
2477e403
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
245 additions
and
36 deletions
+245
-36
.idea/workspace.xml
.idea/workspace.xml
+128
-0
internal/config/image/dtk_image.go
internal/config/image/dtk_image.go
+49
-6
internal/config/image/envvars.go
internal/config/image/envvars.go
+1
-0
internal/discover/graphics.go
internal/discover/graphics.go
+9
-1
internal/discover/mounts.go
internal/discover/mounts.go
+23
-2
internal/modifier/gated.go
internal/modifier/gated.go
+4
-2
internal/modifier/graphics.go
internal/modifier/graphics.go
+31
-25
No files found.
.idea/workspace.xml
0 → 100644
View file @
4b9ce81e
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"AutoImportSettings"
>
<option
name=
"autoReloadType"
value=
"ALL"
/>
</component>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"2e903f06-2eda-40fc-a9ea-e83b0a0df370"
name=
"更改"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/internal/config/image/dtk_image.go"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/internal/config/image/dtk_image.go"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/internal/config/image/envvars.go"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/internal/config/image/envvars.go"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/internal/discover/graphics.go"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/internal/discover/graphics.go"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/internal/discover/mounts.go"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/internal/discover/mounts.go"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/internal/modifier/gated.go"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/internal/modifier/gated.go"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/internal/modifier/graphics.go"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/internal/modifier/graphics.go"
afterDir=
"false"
/>
</list>
<option
name=
"SHOW_DIALOG"
value=
"false"
/>
<option
name=
"HIGHLIGHT_CONFLICTS"
value=
"true"
/>
<option
name=
"HIGHLIGHT_NON_ACTIVE_CHANGELIST"
value=
"false"
/>
<option
name=
"LAST_RESOLUTION"
value=
"IGNORE"
/>
</component>
<component
name=
"FileTemplateManagerImpl"
>
<option
name=
"RECENT_TEMPLATES"
>
<list>
<option
value=
"Go File"
/>
</list>
</option>
</component>
<component
name=
"GOROOT"
url=
"file://$PROJECT_DIR$/../sdk/go1.25.3"
/>
<component
name=
"Git.Settings"
>
<option
name=
"RECENT_GIT_ROOT_PATH"
value=
"$PROJECT_DIR$"
/>
</component>
<component
name=
"MarkdownSettingsMigration"
>
<option
name=
"stateVersion"
value=
"1"
/>
</component>
<component
name=
"ProjectColorInfo"
>
{
"
associatedIndex
"
: 7
}
</component>
<component
name=
"ProjectId"
id=
"34gVepsnW85Ws6mlzFOA01LkydX"
/>
<component
name=
"ProjectViewState"
>
<option
name=
"hideEmptyMiddlePackages"
value=
"true"
/>
<option
name=
"showLibraryContents"
value=
"true"
/>
</component>
<component
name=
"PropertiesComponent"
>
<![CDATA[{
"keyToString": {
"DefaultGoTemplateProperty": "Go File",
"ModuleVcsDetector.initialDetectionPerformed": "true",
"RunOnceActivity.GoLinterPluginOnboarding": "true",
"RunOnceActivity.GoLinterPluginStorageMigration": "true",
"RunOnceActivity.OpenProjectViewOnStart": "true",
"RunOnceActivity.ShowReadmeOnStart": "true",
"RunOnceActivity.git.unshallow": "true",
"RunOnceActivity.go.formatter.settings.were.checked": "true",
"RunOnceActivity.go.migrated.go.modules.settings": "true",
"RunOnceActivity.go.modules.automatic.dependencies.download": "true",
"RunOnceActivity.go.modules.go.list.on.any.changes.was.set": "true",
"WebServerToolWindowFactoryState": "false",
"git-widget-placeholder": "main",
"go.import.settings.migrated": "true",
"go.sdk.automatically.set": "true",
"last_opened_file_path": "D:/container-toolkit",
"node.js.detected.package.eslint": "true",
"node.js.selected.package.eslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
"settings.editor.selected.configurable": "preferences.pluginManager",
"vue.rearranger.settings.migration": "true"
}
}]]>
</component>
<component
name=
"SharedIndexes"
>
<attachedChunks>
<set>
<option
value=
"bundled-gosdk-f466f9b0953e-146d08934cbf-org.jetbrains.plugins.go.sharedIndexes.bundled-GO-252.26830.102"
/>
<option
value=
"bundled-js-predefined-d6986cc7102b-3aa1da707db6-JavaScript-GO-252.26830.102"
/>
</set>
</attachedChunks>
</component>
<component
name=
"SpellCheckerSettings"
RuntimeDictionaries=
"0"
Folders=
"0"
CustomDictionaries=
"0"
DefaultDictionary=
"应用程序级"
UseSingleDictionary=
"true"
transferred=
"true"
/>
<component
name=
"TaskManager"
>
<task
active=
"true"
id=
"Default"
summary=
"默认任务"
>
<changelist
id=
"2e903f06-2eda-40fc-a9ea-e83b0a0df370"
name=
"更改"
comment=
""
/>
<created>
1761635300250
</created>
<option
name=
"number"
value=
"Default"
/>
<option
name=
"presentableId"
value=
"Default"
/>
<updated>
1761635300250
</updated>
</task>
<servers
/>
</component>
<component
name=
"TypeScriptGeneratedFilesManager"
>
<option
name=
"version"
value=
"3"
/>
</component>
<component
name=
"VgoProject"
>
<settings-migrated>
true
</settings-migrated>
</component>
<component
name=
"XDebuggerManager"
>
<breakpoint-manager>
<breakpoints>
<line-breakpoint
enabled=
"true"
type=
"DlvLineBreakpoint"
>
<url>
file://$PROJECT_DIR$/pkg/c3000cdi/lib-c3000smi.go
</url>
<line>
158
</line>
<option
name=
"timeStamp"
value=
"1"
/>
</line-breakpoint>
<line-breakpoint
enabled=
"true"
type=
"DlvLineBreakpoint"
>
<url>
file://$PROJECT_DIR$/internal/dcu-tracker/dcu-tracker.go
</url>
<line>
2
</line>
<option
name=
"timeStamp"
value=
"2"
/>
</line-breakpoint>
<line-breakpoint
enabled=
"true"
type=
"DlvLineBreakpoint"
>
<url>
file://$PROJECT_DIR$/cmd/dcu-ctk/docker/docker.go
</url>
<line>
37
</line>
<option
name=
"timeStamp"
value=
"3"
/>
</line-breakpoint>
<line-breakpoint
enabled=
"true"
type=
"DlvLineBreakpoint"
>
<url>
file://$PROJECT_DIR$/cmd/dcu-ctk/runtime/configure/configure.go
</url>
<line>
205
</line>
<option
name=
"timeStamp"
value=
"4"
/>
</line-breakpoint>
<line-breakpoint
enabled=
"true"
type=
"DlvLineBreakpoint"
>
<url>
file://$PROJECT_DIR$/internal/oci/spec_file.go
</url>
<line>
6
</line>
<option
name=
"timeStamp"
value=
"5"
/>
</line-breakpoint>
<line-breakpoint
enabled=
"true"
type=
"DlvLineBreakpoint"
>
<url>
file://$PROJECT_DIR$/internal/modifier/sysfs.go
</url>
<line>
6
</line>
<option
name=
"timeStamp"
value=
"6"
/>
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
</project>
\ No newline at end of file
internal/config/image/dtk_image.go
View file @
4b9ce81e
...
...
@@ -5,12 +5,15 @@
package
image
import
(
"bufio"
"dcu-container-toolkit/internal/hydcu"
"fmt"
"github.com/opencontainers/runtime-spec/specs-go"
"os"
"path/filepath"
"strings"
"dcu-container-toolkit/internal/hydcu"
"regexp"
"strconv"
"
github.com/opencontainers/runtime-spec/specs-go
"
"
strings
"
"tags.cncf.io/container-device-interface/pkg/parser"
)
...
...
@@ -18,9 +21,9 @@ import (
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type
DTK
struct
{
env
map
[
string
]
string
mounts
[]
specs
.
Mount
ContainerId
string
env
map
[
string
]
string
mounts
[]
specs
.
Mount
ContainerId
string
}
// NewDTKImageFromSpec creates a DTK image from the input OCI runtime spec.
...
...
@@ -222,3 +225,43 @@ func (i DTK) CDIDevicesFromMounts() []string {
func
(
i
DTK
)
VisibleDevicesFromEnvVar
()
[]
string
{
return
i
.
DevicesFromEnvvars
(
EnvVarDTKVisibleDevices
,
EnvVarNvidiaVisibleDevices
)
.
List
()
}
func
(
i
DTK
)
VdcuFromEnv
(
envVar
string
)
[]
string
{
getPci
:=
func
(
path
string
)
string
{
var
pciIds
[]
string
pciRegex
:=
regexp
.
MustCompile
(
`PciBusId:\s*([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])`
)
file
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
return
""
}
defer
file
.
Close
()
scanner
:=
bufio
.
NewScanner
(
file
)
for
scanner
.
Scan
()
{
line
:=
scanner
.
Text
()
matches
:=
pciRegex
.
FindStringSubmatch
(
line
)
if
len
(
matches
)
>
0
{
pciIds
=
append
(
pciIds
,
matches
[
1
])
}
}
if
len
(
pciIds
)
>
0
{
return
pciIds
[
0
]
}
return
""
}
var
vdcuDevices
[]
string
if
value
,
ok
:=
i
.
env
[
envVar
];
ok
{
for
_
,
index
:=
range
strings
.
Split
(
value
,
","
)
{
var
path
=
fmt
.
Sprintf
(
"/etc/vdev/vdev%s.conf"
,
index
)
var
pciId
=
getPci
(
path
)
if
len
(
pciId
)
>
0
{
vdcuDevices
=
append
(
vdcuDevices
,
pciId
)
}
}
}
return
vdcuDevices
}
func
(
i
DTK
)
VdcuVisibleDevicesFromEnvVar
()
[]
string
{
return
i
.
VdcuFromEnv
(
EnvVarVDTKVisibleDevices
)
}
internal/config/image/envvars.go
View file @
4b9ce81e
...
...
@@ -8,4 +8,5 @@ const (
EnvVarDTKVisibleDevices
=
"DCU_VISIBLE_DEVICES"
EnvVarNvidiaVisibleDevices
=
"NVIDIA_VISIBLE_DEVICES"
EnvROCmVersion
=
"ROCM_VERSION"
EnvVarVDTKVisibleDevices
=
"VDCU_VISIBLE_DEVICES"
)
internal/discover/graphics.go
View file @
4b9ce81e
...
...
@@ -118,10 +118,18 @@ func NewCommonHCUDiscoverer(logger logger.Interface, dtkCDIHookPath string, driv
var
trackHook
Hook
value
:=
containerImage
.
Getenv
(
image
.
EnvVarDTKVisibleDevices
)
value1
:=
containerImage
.
Getenv
(
image
.
EnvVarNvidiaVisibleDevices
)
if
len
(
value
)
>
0
||
len
(
value1
)
>
0
{
value2
:=
containerImage
.
Getenv
(
image
.
EnvVarVDTKVisibleDevices
)
if
len
(
value
)
>
0
||
len
(
value1
)
>
0
{
trackHook
=
CreateTrackHook
(
dtkCDIHookPath
,
containerImage
.
ContainerId
)
}
if
len
(
value2
)
>
0
{
m
,
ok
:=
libraries
.
(
*
mounts
)
if
ok
{
m
.
addVdcu
(
value2
)
}
}
var
d
Discover
if
trackHook
.
Lifecycle
==
""
{
d
=
Merge
(
...
...
internal/discover/mounts.go
View file @
4b9ce81e
...
...
@@ -48,9 +48,9 @@ func (d *mounts) Mounts() ([]Mount, error) {
return
nil
,
fmt
.
Errorf
(
"no lookup defined"
)
}
var
temps
[]
Mount
if
d
.
cache
!=
nil
{
d
.
logger
.
Debugf
(
"returning cached mounts"
)
return
d
.
cache
,
nil
temps
=
d
.
cache
}
d
.
Lock
()
...
...
@@ -101,6 +101,10 @@ func (d *mounts) Mounts() ([]Mount, error) {
for
_
,
m
:=
range
uniqueMounts
{
mounts
=
append
(
mounts
,
m
)
}
for
_
,
item
:=
range
temps
{
mounts
=
append
(
mounts
,
item
)
}
d
.
cache
=
mounts
return
d
.
cache
,
nil
...
...
@@ -114,3 +118,20 @@ func (d *mounts) relativeTo(path string) string {
return
strings
.
TrimPrefix
(
path
,
d
.
root
)
}
func
(
d
*
mounts
)
addVdcu
(
indexs
string
)
{
var
mounts
[]
Mount
for
_
,
index
:=
range
strings
.
Split
(
indexs
,
","
)
{
var
mount
=
Mount
{
HostPath
:
fmt
.
Sprintf
(
"/etc/vdev/vdev%s.conf"
,
index
),
Path
:
fmt
.
Sprintf
(
"/etc/vdev/docker/vdev%s.conf"
,
index
),
Options
:
[]
string
{
"rbind"
,
"ro"
,
"rprivate"
,
},
}
mounts
=
append
(
mounts
,
mount
)
}
d
.
cache
=
mounts
}
internal/modifier/gated.go
View file @
4b9ce81e
...
...
@@ -22,8 +22,10 @@ import (
// If not devices are selected, no changes are made.
func
NewFeatureGatedModifier
(
logger
logger
.
Interface
,
cfg
*
config
.
Config
,
image
image
.
DTK
,
driver
*
root
.
Driver
)
(
oci
.
SpecModifier
,
error
)
{
if
devices
:=
image
.
VisibleDevicesFromEnvVar
();
len
(
devices
)
==
0
{
logger
.
Infof
(
"No modification required; no devices requested"
)
return
nil
,
nil
if
devices
=
image
.
VdcuVisibleDevicesFromEnvVar
();
len
(
devices
)
==
0
{
logger
.
Infof
(
"No modification required; no devices requested"
)
return
nil
,
nil
}
}
var
discoverers
[]
discover
.
Discover
...
...
internal/modifier/graphics.go
View file @
4b9ce81e
...
...
@@ -15,6 +15,7 @@ import (
"dcu-container-toolkit/internal/oci"
"fmt"
"path/filepath"
"slices"
"sort"
"strconv"
)
...
...
@@ -23,31 +24,31 @@ import (
// The value of the DTK_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
func
NewGraphicsModifier
(
logger
logger
.
Interface
,
cfg
*
config
.
Config
,
containerImage
image
.
DTK
,
driver
*
root
.
Driver
,
isMount
bool
)
(
oci
.
SpecModifier
,
error
)
{
dtkCDIHookPath
:=
cfg
.
DTKCTKConfig
.
Path
value
:=
containerImage
.
Getenv
(
image
.
EnvVarDTKVisibleDevices
)
value
:=
containerImage
.
Getenv
(
image
.
EnvVarDTKVisibleDevices
)
if
len
(
value
)
>
0
{
dcuTracker
,
err
:=
dcuTracker
.
New
()
if
err
==
nil
{
_
,
err
=
dcuTracker
.
ReserveDCUs
(
value
,
containerImage
.
ContainerId
)
logger
.
Infof
(
"ReserveDCUs %s"
,
value
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to reserve DCUs: %v"
,
err
)
dcuTracker
,
err
:=
dcuTracker
.
New
()
if
err
==
nil
{
_
,
err
=
dcuTracker
.
ReserveDCUs
(
value
,
containerImage
.
ContainerId
)
logger
.
Infof
(
"ReserveDCUs %s"
,
value
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to reserve DCUs: %v"
,
err
)
}
}
}
}
value
=
containerImage
.
Getenv
(
image
.
EnvVarNvidiaVisibleDevices
)
if
len
(
value
)
>
0
{
dcuTracker
,
err
:=
dcuTracker
.
New
()
if
err
==
nil
{
_
,
err
=
dcuTracker
.
ReserveDCUs
(
value
,
containerImage
.
ContainerId
)
logger
.
Infof
(
"ReserveDCUs %s"
,
value
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to reserve DCUs: %v"
,
err
)
}
}
}
value
=
containerImage
.
Getenv
(
image
.
EnvVarNvidiaVisibleDevices
)
if
len
(
value
)
>
0
{
dcuTracker
,
err
:=
dcuTracker
.
New
()
if
err
==
nil
{
_
,
err
=
dcuTracker
.
ReserveDCUs
(
value
,
containerImage
.
ContainerId
)
logger
.
Infof
(
"ReserveDCUs %s"
,
value
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to reserve DCUs: %v"
,
err
)
}
}
}
comDiscoverer
,
err
:=
discover
.
NewCommonHCUDiscoverer
(
logger
,
dtkCDIHookPath
,
...
...
@@ -60,7 +61,9 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerI
}
visibleDevices
:=
containerImage
.
DevicesFromEnvvars
(
image
.
EnvVarDTKVisibleDevices
,
image
.
EnvVarNvidiaVisibleDevices
)
if
len
(
visibleDevices
.
List
())
==
0
{
var
vdcuDevices
=
containerImage
.
VdcuFromEnv
(
image
.
EnvVarVDTKVisibleDevices
)
if
len
(
visibleDevices
.
List
())
>
0
&&
len
(
vdcuDevices
)
>
0
{
logger
.
Info
(
"No devices requested"
)
return
nil
,
nil
}
...
...
@@ -83,6 +86,11 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerI
}
}
for
_
,
vdcuDevice
:=
range
vdcuDevices
{
if
!
slices
.
Contains
(
selectedBusIds
,
vdcuDevice
)
{
selectedBusIds
=
append
(
selectedBusIds
,
vdcuDevice
)
}
}
// In standard usage, the devRoot is the same as the driver.Root.
devRoot
:=
driver
.
Root
drmNodes
,
err
:=
discover
.
NewDRMNodesDiscoverer
(
...
...
@@ -125,7 +133,7 @@ func getDevicesFromDriver() ([]string, error) {
if
err
!=
nil
{
return
devices
,
fmt
.
Errorf
(
"failed to find devices bus id: %v"
,
err
)
}
if
len
(
matches
)
==
0
{
if
len
(
matches
)
==
0
{
m
,
err
:=
filepath
.
Glob
(
"/sys/module/hy*cu/drivers/pci:amdgpu/[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]:*"
)
if
err
!=
nil
{
return
devices
,
fmt
.
Errorf
(
"failed to find devices bus id: %v"
,
err
)
...
...
@@ -133,8 +141,6 @@ func getDevicesFromDriver() ([]string, error) {
matches
=
append
(
matches
,
m
...
)
}
for
_
,
path
:=
range
sort
.
StringSlice
(
matches
)
{
devices
=
append
(
devices
,
filepath
.
Base
(
path
))
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment