Commit 8a683270 authored by songlinfeng's avatar songlinfeng 💬
Browse files

add k8s-device-plugin

parent f5f6fd20
FROM docker.io/golang:1.25.3-alpine3.22
RUN apk --no-cache add git pkgconfig build-base libdrm-dev
RUN apk --no-cache add hwloc-dev --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community
RUN mkdir -p /go/src/github.com/DCU/k8s-device-plugin
ADD . /go/src/github.com/DCU/k8s-device-plugin
WORKDIR /go/src/github.com/DCU/k8s-device-plugin/cmd/k8s-device-plugin
RUN go install \
-ldflags="-X main.gitDescribe=$(git -C /go/src/github.com/DCU/k8s-device-plugin/ describe --always --long --dirty)"
FROM alpine:3.22
LABEL \
authors="songlinfeng" \
name="k8s-device-plugin" \
version="1.0.0" \
description="DCU device plugin for Kubernetes"
RUN apk --no-cache add ca-certificates libdrm
RUN apk --no-cache add hwloc --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community
COPY --from=0 /go/bin/k8s-device-plugin .
CMD ["./k8s-device-plugin", "-logtostderr=true", "-stderrthreshold=INFO", "-v=5"]
\ No newline at end of file
package main
import (
"flag"
"fmt"
"github.com/golang/glog"
"github.com/kubevirt/device-plugin-manager/pkg/dpm"
"k8s-device-plugin/internal/pkg/hwloc"
"k8s-device-plugin/internal/pkg/hydcu"
"k8s-device-plugin/internal/pkg/plugin"
"os"
"time"
)
var gitDescribe string
......@@ -26,13 +32,102 @@ func ParseStrategy(s string) (ResourceNamingStrategy, error) {
}
}
// return []string{"dcu"}
func getResourceList(resourceNamingStrategy ResourceNamingStrategy) ([]string, error) {
var resources []string
// Check if the node is homegeneous
isHomogeneous := hydcu.IsHomogeneous()
partitionCountMap := hydcu.UniquePartitionConfigCount(hydcu.GetHYDCUs())
if len(hydcu.GetHYDCUs()) == 0 {
return resources, nil
}
if isHomogeneous {
if resourceNamingStrategy == StrategySingle {
resources = []string{"dcu"}
} else if resourceNamingStrategy == StrategyMixed {
if len(partitionCountMap) == 0 {
// If partitioning is not supported on the node, we should report resources under "gpu" regardless of the strategy
resources = []string{"dcu"}
} else {
for partitionType, count := range partitionCountMap {
if count > 0 {
resources = append(resources, partitionType)
}
}
}
}
} else {
if resourceNamingStrategy == StrategySingle {
return resources, fmt.Errorf("Partitions of different styles across DCUs in a node is not supported with single strategy. Please start device plugin with mixed strategy")
} else if resourceNamingStrategy == StrategyMixed {
for partitionType, count := range partitionCountMap {
if count > 0 {
resources = append(resources, partitionType)
}
}
}
}
return resources, nil
}
func main() {
versions := [...]string{
"DCU device plugin for Kubernetes",
fmt.Sprintf("%s version %s", os.Args[0], gitDescribe),
fmt.Sprintf("%s", hwloc.GetVersions()),
}
flag.Usage = func() {
for _, v := range versions {
fmt.Println(v)
}
fmt.Fprintln(os.Stderr, "Usage:")
flag.PrintDefaults()
}
var pulse int
var resourceNamingStrategy string
flag.IntVar(&pulse, "pulse", 0, "time between health check polling in seconds. Set to 0 to disable health check")
flag.StringVar(&resourceNamingStrategy, "resource-naming-strategy", "single", "Resource strategy to be used: single or mixed")
flag.Parse()
strategy, err := ParseStrategy(resourceNamingStrategy)
if err != nil {
glog.Errorf("%v", err)
}
for _, v := range versions {
fmt.Println(v)
glog.Infof("%s", v)
}
l := plugin.DCULister{
ResUpdateChan: make(chan dpm.PluginNameList),
Heartbeat: make(chan bool),
}
manager := dpm.NewManager(&l)
if pulse > 0 {
go func() {
glog.Infof("Heart beating every %d seconds", pulse)
for {
time.Sleep(time.Second * time.Duration(pulse))
l.Heartbeat <- true
}
}()
}
go func() {
// /sys/class/kfd only exists if ROCm kernel/driver is installed
var path = "/sys/class/kfd"
if _, err := os.Stat(path); err == nil {
resources, err := getResourceList(strategy)
if err != nil {
glog.Errorf("Error occured: %v", err)
os.Exit(1)
}
if len(resources) > 0 {
l.ResUpdateChan <- resources
}
}
}()
manager.Run()
}
module k8s-device-plugin
go 1.25.3
require (
github.com/golang/glog v1.2.5
github.com/kubevirt/device-plugin-manager v1.19.5
)
require (
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/gogo/protobuf v1.3.1 // indirect
github.com/golang/protobuf v1.4.2 // indirect
golang.org/x/net v0.0.0-20200904194848-62affa334b73 // indirect
golang.org/x/sys v0.0.0-20200917073148-efd3b9a0ff20 // indirect
golang.org/x/text v0.3.3 // indirect
google.golang.org/genproto v0.0.0-20200916143405-f6a2fa72f0c4 // indirect
google.golang.org/grpc v1.32.0 // indirect
google.golang.org/protobuf v1.25.0 // indirect
k8s.io/kubelet v0.19.2 // indirect
)
This diff is collapsed.
package allocator
type Allocator struct {
available []*Device
allocated []*Device
policy Policy
}
type Policy interface {
Init(devs []*Device, topoDir string) error
Allocate(available, required []string, size int) ([]string, error)
}
package allocator
import (
"fmt"
"github.com/golang/glog"
"math"
)
const (
invalidSize = "allocation size can not be negative"
invalidAvailable = "available devices count less than allocation size"
invalidRequired = "must_include devices size is more than allocation size"
invalidReqAvailable = "must_include length should be less than or equal to avilable device size"
invalidInit = "Init method must be called before Allocate"
noCandidateFound = "No candidate subset found with matching criteria"
)
type BestEffortPolicy struct {
devices []*Device
devicesMap map[string]*Device
devicePartitions map[string]*DevicePartitions
p2pWeights map[int]map[int]int
}
func NewBestEffortPolicy() *BestEffortPolicy {
return &BestEffortPolicy{
devices: make([]*Device, 0),
devicesMap: make(map[string]*Device),
devicePartitions: make(map[string]*DevicePartitions),
p2pWeights: make(map[int]map[int]int),
}
}
func (b *BestEffortPolicy) getDevicesFromIds(ids []string) []*Device {
var res []*Device
for _, id := range ids {
res = append(res, b.devicesMap[id])
}
return res
}
func (b *BestEffortPolicy) Init(devs []*Device, topoDir string) error {
err := fetchAllPairWeights(devs, b.p2pWeights, topoDir)
if len(b.p2pWeights) == 0 {
return fmt.Errorf("Besteffort Policy init failed to initialize p2pWeights")
}
if err == nil {
b.devices = devs
for idx := range devs {
b.devicesMap[devs[idx].Id] = devs[idx]
}
b.devicePartitions = groupPartitionsByDevId(devs)
for _, par := range b.devicePartitions {
glog.Infof("Device: %s Partitions: %v", par.ParentId, par.Devs)
}
}
return err
}
func setContainsAll[K int | string](set, subset []K) bool {
if len(subset) > len(set) {
return false
}
for _, dev := range subset {
devFound := false
for i := range set {
if set[i] == dev {
devFound = true
break
}
}
if !devFound {
return false
}
}
return true
}
func (b *BestEffortPolicy) Allocate(availableIds, requiredIds []string, size int) ([]string, error) {
outset := []string{}
if size <= 0 {
return outset, fmt.Errorf(invalidSize)
}
if len(availableIds) < size {
return outset, fmt.Errorf(invalidAvailable)
}
if len(requiredIds) > size {
return outset, fmt.Errorf(invalidRequired)
}
if len(requiredIds) > len(availableIds) {
return outset, fmt.Errorf(invalidReqAvailable)
}
if len(b.devices) == 0 {
return outset, fmt.Errorf(invalidInit)
}
if len(availableIds) == size {
return availableIds, nil
}
if len(requiredIds) == size {
return requiredIds, nil
}
if len(b.p2pWeights) == 0 {
return outset, fmt.Errorf(invalidInit)
}
if !setContainsAll(availableIds, requiredIds) {
return outset, fmt.Errorf(noCandidateFound)
}
available := b.getDevicesFromIds(availableIds)
required := b.getDevicesFromIds(requiredIds)
allSubsets, err := getCandidateDeviceSubsets(b.devicePartitions, b.devices, available, required, size, b.p2pWeights)
if err != nil {
return outset, err
}
bestScore := math.MaxInt32
var candidate *DeviceSet
for _, subset := range allSubsets {
if subset.TotalWeight < bestScore {
candidate = subset
bestScore = subset.TotalWeight
}
}
for _, id := range candidate.Ids {
for _, d := range available {
if d.NodeId == id {
outset = append(outset, d.Id)
break
}
}
}
glog.Infof("best device subset:%v best score:%v", outset, candidate.TotalWeight)
return outset, nil
}
package allocator
import (
"bufio"
"fmt"
"github.com/golang/glog"
"os"
"path/filepath"
"regexp"
"slices"
"sort"
"strconv"
"strings"
)
const (
topoRootPath = "/sys/class/kfd/kfd/topology/nodes"
)
// below scores/weights are used to determine the closeness/efficiency of communication between GPU pairs
const (
// weight if GPUs/partitions belong to same GPU
sameDevIdWeight = 10
// weight if a pair is connected via XGMI link
xgmiLinkWeight = 10
// weight if GPU pair belongs to same numa node
sameNumaNodeWeight = 10
// weight if GPUs/partitions belong to different GPU.
// In case of full GPUs, the weight is 3
differentDevIdWeight = 20
// weight if GPU pair belongs to different numa node
differentNumaNodeWeight = 20
// weight if a pair is connected via PCIE link
pcieLinkWeight = 40
// weight if a pair is connected via any other link apart from XGMI or PCIE
otherLinkWeight = 50
)
type Device struct {
Id string
NodeId int
NumaNode int
DevId string
Card int
RenderD int
ComputePartitionType string
MemoryPartitionType string
}
type DevicePartitions struct {
ParentId string
DevId string
Ids []int
Devs []string
}
type DeviceSet struct {
Ids []int
TotalWeight int
LastIdx int
Size int
ParentIds []int
}
type DevicePartitionSet struct {
Ids []int
TotalWeight int
LastPartitionIdx int
}
func fetchTopoProperties(path string, re []*regexp.Regexp) ([]int, error) {
f, e := os.Open(path)
if e != nil {
glog.Errorf("Unable to open properties file. Error: %v", e)
return []int{0}, e
}
defer f.Close()
res := make([]int, len(re))
scanner := bufio.NewScanner(f)
for scanner.Scan() {
for idx := range re {
m := re[idx].FindStringSubmatch(scanner.Text())
if m == nil {
continue
}
v, err := strconv.ParseInt(m[1], 0, 32)
if err != nil {
glog.Errorf("Unable to parse properties file. Error: %v", err)
return nil, err
}
res[idx] = int(v)
}
}
return res, nil
}
func calculatePairWeight(from, to *Device, linkType int) int {
weight := 0
if from.DevId == to.DevId {
weight = weight + sameDevIdWeight
} else {
weight = weight + differentDevIdWeight
}
if linkType == 11 { // link type 11 is xgmi
weight = weight + xgmiLinkWeight
} else if linkType == 2 { //link type 2 is PCIE
weight = weight + pcieLinkWeight
} else { // other link types are given higher weight
weight = weight + otherLinkWeight
}
if from.NumaNode == to.NumaNode {
weight = weight + sameNumaNodeWeight
} else {
weight = weight + differentNumaNodeWeight
}
return weight
}
func scanAndPopulatePeerWeights(fromPath string, devices []*Device, lookupNodes map[int]struct{}, p2pWeights map[int]map[int]int) error {
paths, err1 := filepath.Glob(filepath.Join(fromPath, "io_links", "[0-9]*"))
p2pPaths, err2 := filepath.Glob(filepath.Join(fromPath, "p2p_links", "[0-9]*"))
if err1 != nil || err2 != nil {
glog.Errorf("unable to fetch io_links and p2p_links, Error1:%v Error2:%v", err1, err2)
return fmt.Errorf("Unable to fetch io_links and p2p_links")
}
if len(p2pPaths) > 0 {
paths = append(paths, p2pPaths...)
}
re := []*regexp.Regexp{
regexp.MustCompile(`node_from\s(\d+)`),
regexp.MustCompile(`node_to\s(\d+)`),
regexp.MustCompile(`type\s(\d+)`),
}
for _, topath := range paths {
propFile := filepath.Join(topath, "properties")
vals, err := fetchTopoProperties(propFile, re)
if err != nil {
continue
}
var from, to int
if vals[0] < vals[1] {
from = vals[0]
to = vals[1]
} else {
from = vals[1]
to = vals[0]
}
if _, ok := lookupNodes[from]; !ok {
continue
}
if _, ok := lookupNodes[to]; !ok {
continue
}
var fromDev, toDev *Device
devsFound := false
for idx := range devices {
if devices[idx].NodeId == from {
fromDev = devices[idx]
}
if devices[idx].NodeId == to {
toDev = devices[idx]
}
if fromDev != nil && toDev != nil {
devsFound = true
break
}
}
if devsFound {
if _, ok := p2pWeights[from]; !ok {
p2pWeights[from] = make(map[int]int)
}
p2pWeights[from][to] = calculatePairWeight(fromDev, toDev, int(vals[2]))
}
}
return nil
}
func fetchAllPairWeights(devices []*Device, p2pWeights map[int]map[int]int, folderPath string) error {
if len(devices) == 0 {
errMsg := fmt.Sprintf("Devices list is empty. Unable to calculate pair wise weights")
glog.Info(errMsg)
return fmt.Errorf(errMsg)
}
if folderPath == "" {
folderPath = topoRootPath
}
paths, err := filepath.Glob(filepath.Join(folderPath, "[0-9]*"))
if err != nil {
return fmt.Errorf("unable to find gpu nodes under topo directory")
}
nodeIds := make(map[int]struct{})
//nodeIds[4] = struct{}{}
for idx := range devices {
nodeIds[devices[idx].NodeId] = struct{}{}
}
drmRenderMinor := []*regexp.Regexp{regexp.MustCompile(`drm_render_minor\s(\d+)`)}
for _, path := range paths {
propFilePath := filepath.Join(path, "properties")
vals, err := fetchTopoProperties(propFilePath, drmRenderMinor)
//varl[0]={128}
if err != nil || vals[0] <= 0 {
continue
}
err = scanAndPopulatePeerWeights(path, devices, nodeIds, p2pWeights)
if err != nil {
return err
}
}
return nil
}
func groupPartitionsByDevId(devs []*Device) map[string]*DevicePartitions {
partitions := make(map[string]*DevicePartitions)
for _, dev := range devs {
if _, ok := partitions[dev.DevId]; !ok {
partitions[dev.DevId] = &DevicePartitions{
DevId: dev.DevId,
Ids: make([]int, 0),
Devs: make([]string, 0),
}
}
if !strings.Contains(dev.Id, "amdgpu_xcp") {
partitions[dev.DevId].ParentId = dev.Id
}
partitions[dev.DevId].Ids = append(partitions[dev.DevId].Ids, dev.NodeId)
partitions[dev.DevId].Devs = append(partitions[dev.DevId].Devs, dev.Id)
}
return partitions
}
func NewDeviceSet(nodeIds, parentIds []int, weight, lastIdx int) *DeviceSet {
return &DeviceSet{
Ids: nodeIds,
TotalWeight: weight,
LastIdx: lastIdx,
Size: len(nodeIds),
ParentIds: parentIds,
}
}
func addDeviceToSubsetAndUpdateWeight(subset *DeviceSet, devId, devIdx int, p2pWeights map[int]map[int]int) *DeviceSet {
currentWeight := subset.TotalWeight
var from, to int
ids := make([]int, 0)
for _, d := range subset.Ids {
if d < devId {
from = d
to = devId
} else {
from = devId
to = d
}
currentWeight = currentWeight + p2pWeights[from][to]
}
ids = append(ids, subset.Ids...)
ids = append(ids, devId)
newSubset := NewDeviceSet(ids, subset.ParentIds, currentWeight, devIdx)
return newSubset
}
func getCandidateDeviceSubsets(allDevPartitions map[string]*DevicePartitions, total, available, required []*Device, size int, p2pWeights map[int]map[int]int) ([]*DeviceSet, error) {
if size <= 0 {
return []*DeviceSet{}, fmt.Errorf("subset size should be positive integer")
}
if len(available) < size {
return []*DeviceSet{}, fmt.Errorf("subset size is more than available devices")
}
sort.Slice(available, func(i, j int) bool {
return available[i].NodeId < available[j].NodeId
})
devPartitions := filterPartitions(allDevPartitions, available, required)
newSize := size - len(required)
subsetsTemp := make([]*DeviceSet, 0)
subsetsFinal := make([]*DeviceSet, 0)
for idx, partition := range devPartitions {
ids := []int{partition.Ids[0]}
parentIds := []int{idx}
devset := NewDeviceSet(ids, parentIds, 0, idx)
if newSize == 1 {
for _, req := range required {
devset = addDeviceToSubsetAndUpdateWeight(devset, req.NodeId, idx, p2pWeights)
}
subsetsFinal = append(subsetsFinal, devset)
continue
}
sizeFulfilled := false
for i := 1; i < len(partition.Ids); i++ {
devset = addDeviceToSubsetAndUpdateWeight(devset, partition.Ids[i], idx, p2pWeights)
if i == newSize-1 {
sizeFulfilled = true
break
}
}
if sizeFulfilled {
for _, req := range required {
devset = addDeviceToSubsetAndUpdateWeight(devset, req.NodeId, idx, p2pWeights)
}
subsetsFinal = append(subsetsFinal, devset)
} else {
subsetsTemp = append(subsetsTemp, devset)
}
}
for {
if len(subsetsTemp) == 0 {
break
}
currentSubset := subsetsTemp[0]
subsetsTemp = subsetsTemp[1:]
if len(currentSubset.ParentIds) == len(devPartitions) {
continue
}
for idx := 0; idx < len(devPartitions); idx++ {
if slices.Contains(currentSubset.ParentIds, idx) {
continue
}
var parentIds []int
parentIds = append(parentIds, currentSubset.ParentIds...)
parentIds = append(parentIds, idx)
devset := NewDeviceSet(currentSubset.Ids, parentIds, currentSubset.TotalWeight, currentSubset.LastIdx)
for _, id := range devPartitions[idx].Ids {
devset = addDeviceToSubsetAndUpdateWeight(devset, id, idx, p2pWeights)
if devset.Size == newSize {
for _, req := range required {
devset = addDeviceToSubsetAndUpdateWeight(devset, req.NodeId, idx, p2pWeights)
}
subsetsFinal = append(subsetsFinal, devset)
break
}
}
if devset.Size < newSize {
subsetsTemp = append(subsetsTemp, devset)
}
}
}
return subsetsFinal, nil
}
func filterPartitions(partitions map[string]*DevicePartitions, available, required []*Device) []*DevicePartitions {
availableIdMap := make(map[int]struct{})
requiredIdMap := make(map[int]struct{})
outset := make([]*DevicePartitions, 0)
for _, av := range available {
availableIdMap[av.NodeId] = struct{}{}
}
for _, req := range required {
requiredIdMap[req.NodeId] = struct{}{}
}
for _, partitionSet := range partitions {
filteredIds := make([]int, 0)
for _, id := range partitionSet.Ids {
if _, ok := requiredIdMap[id]; ok {
continue
}
if _, ok := availableIdMap[id]; ok {
filteredIds = append(filteredIds, id)
}
}
if len(filteredIds) > 0 {
sort.Slice(filteredIds, func(i, j int) bool {
return filteredIds[i] < filteredIds[j]
})
filteredPartition := &DevicePartitions{
DevId: partitionSet.DevId,
Ids: filteredIds,
ParentId: partitionSet.ParentId,
}
outset = append(outset, filteredPartition)
}
}
sort.Slice(outset, func(i, j int) bool {
len1 := len(outset[i].Ids)
len2 := len(outset[j].Ids)
if len1 == len2 {
return outset[i].ParentId < outset[j].ParentId
}
return len1 < len2
})
return outset
}
package hydcu
import (
"bufio"
"errors"
"github.com/golang/glog"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
)
var topoDrmRenderMinorRe = regexp.MustCompile(`drm_render_minor\s(\d+)`)
var topoUniqueIdRe = regexp.MustCompile(`unique_id\s(\d+)`)
func GetDevIdsFromTopology(topoRootParam ...string) map[int]string {
topoRoot := "/sys/class/kfd/kfd"
if len(topoRootParam) == 1 {
topoRoot = topoRootParam[0]
}
renderDevIds := make(map[int]string)
var nodeFiles []string
var err error
if nodeFiles, err = filepath.Glob(topoRoot + "/topology/nodes/*/properties"); err != nil {
glog.Fatalf("glob error: %s", err)
return renderDevIds
}
for _, nodeFile := range nodeFiles {
glog.Info("Parsing " + nodeFile)
v, e := ParseTopologyProperties(nodeFile, topoDrmRenderMinorRe)
if e != nil {
glog.Error(e)
continue
}
if v <= 0 {
continue
}
devID, e := ParseTopologyPropertiesString(nodeFile, topoUniqueIdRe)
if e != nil {
glog.Error(e)
continue
}
renderDevIds[int(v)] = devID
}
/*
renderDevIds={128:"8324688932758364225",129:"8324688932758358177"}
*/
return renderDevIds
}
func ParseTopologyProperties(path string, re *regexp.Regexp) (int64, error) {
f, e := os.Open(path)
if e != nil {
return 0, e
}
defer f.Close()
e = errors.New("Topology property not found. Regex: " + re.String())
v := int64(0)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
m := re.FindStringSubmatch(scanner.Text())
if m == nil {
continue
}
v, e = strconv.ParseInt(m[1], 0, 64)
break
}
return v, e
}
func ParseTopologyPropertiesString(path string, re *regexp.Regexp) (string, error) {
f, e := os.Open(path)
if e != nil {
return "", e
}
defer f.Close()
e = errors.New("Topology property not found. Regex: " + re.String())
v := ""
scanner := bufio.NewScanner(f)
for scanner.Scan() {
m := re.FindStringSubmatch(scanner.Text())
if m == nil {
continue
}
v = m[1]
e = nil
break
}
return v, e
}
func GetNodeIdsFromTopology(topoRootParam ...string) map[int]int {
topoRoot := "/sys/class/kfd/kfd"
if len(topoRootParam) == 1 {
topoRoot = topoRootParam[0]
}
renderNodeIds := make(map[int]int)
var nodeFiles []string
var err error
if nodeFiles, err = filepath.Glob(topoRoot + "/topology/nodes/*/properties"); err != nil {
glog.Fatalf("glob error: %s", err)
return renderNodeIds
}
for _, nodeFile := range nodeFiles {
glog.Info("Parsing " + nodeFile)
v, e := ParseTopologyProperties(nodeFile, topoDrmRenderMinorRe)
if e != nil {
glog.Error(e)
continue
}
if v <= 0 {
continue
}
nodeIndex := filepath.Base(filepath.Dir(nodeFile))
nodeId, err := strconv.Atoi(nodeIndex)
if err != nil {
glog.Errorf("Failed to convert node index %s to int: %v", nodeIndex, err)
continue
}
renderNodeIds[int(v)] = nodeId
}
/*
renderNodeIds={128:4,129:5}
*/
return renderNodeIds
}
func GetHYDCUs() map[string]map[string]interface{} {
matches, err := filepath.Glob("/sys/module/hy*cu/drivers/pci:hy*cu/[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]:*")
if err != nil {
glog.Warning("Failed to find hydcu driver directory: %s", err)
return make(map[string]map[string]interface{})
}
devID := ""
devices := make(map[string]map[string]interface{})
card, renderD, nodeId := 0, 128, 0
renderDevIds := GetDevIdsFromTopology()
renderNodeIds := GetNodeIdsFromTopology()
/*
renderDevIds={128:"8324688932758364225",129:"8324688932758358177"}
*/
/*
renderNodeIds={128:4,129:5}
*/
for _, path := range matches {
computePartitionFile := filepath.Join(path, "current_compute_partition")
memoryPartitionFile := filepath.Join(path, "current_memory_partition")
numaNodeFile := filepath.Join(path, "numa_node")
computePartitionType, memoryPartitionType := "", ""
numaNode := -1
if data, err := ioutil.ReadFile(computePartitionFile); err == nil {
computePartitionType = strings.ToLower(strings.TrimSpace(string(data)))
} else {
glog.Warningf("Failed to read 'current_compute_partition' file at %s: %s", computePartitionFile, err)
}
if data, err := ioutil.ReadFile(memoryPartitionFile); err == nil {
memoryPartitionType = strings.ToLower(strings.TrimSpace(string(data)))
} else {
glog.Warningf("Failed to read 'current_memory_partition' file at %s: %s", memoryPartitionFile, err)
}
if data, err := ioutil.ReadFile(numaNodeFile); err == nil {
numaNodeStr := strings.TrimSpace(string(data))
numaNode, err = strconv.Atoi(numaNodeStr)
if err != nil {
glog.Warningf("Failed to convert 'numa_node' value to int: %s", err)
continue
}
} else {
glog.Warningf("Failed to read 'numa_node' file at %s: %s", numaNodeFile, err)
continue
}
glog.Info(path)
devPaths, _ := filepath.Glob(path + "/drm/*")
for _, devPath := range devPaths {
switch name := filepath.Base(devPath); {
case name[0:4] == "card":
card, _ = strconv.Atoi(name[4:])
//card = 0
case name[0:7] == "renderD":
renderD, _ = strconv.Atoi(name[7:])
//renderD = 128
if val, exists := renderDevIds[renderD]; exists {
devID = val
//devID = 8324688932758364225
}
if id, exists := renderNodeIds[renderD]; exists {
nodeId = id
//nodeId = 2
}
}
devices[filepath.Base(path)] = map[string]interface{}{
"card": card,
"renderD": renderD,
"devID": devID,
"computePartitionType": computePartitionType,
"memoryPartitionType": memoryPartitionType,
"numaNode": numaNode,
"nodeId": nodeId,
}
}
}
platformMatches, _ := filepath.Glob("/sys/devices/platform/amdgpu_xcp_*")
for _, path := range platformMatches {
glog.Info(path)
devPaths, _ := filepath.Glob(path + "/drm/*")
computePartitionType, memoryPartitionType := "", ""
numaNode := -1
for _, devPath := range devPaths {
switch name := filepath.Base(devPath); {
case name[0:4] == "card":
card, _ = strconv.Atoi(name[4:])
case name[0:7] == "renderD":
renderD, _ = strconv.Atoi(name[7:])
if val, exists := renderDevIds[renderD]; exists {
devID = val
}
// Set the computePartitionType and memoryPartitionType from the real GPU or from other partitions using the common devID
for _, device := range devices {
if device["devID"] == devID {
if device["computePartitionType"].(string) != "" && device["memoryPartitionType"].(string) != "" {
computePartitionType = device["computePartitionType"].(string)
memoryPartitionType = device["memoryPartitionType"].(string)
numaNode = device["numaNode"].(int)
break
}
}
}
if id, exists := renderNodeIds[renderD]; exists {
nodeId = id
}
}
}
// This is needed because some of the visible renderD are actually not valid
// Their validity depends on topology information from KFD
if _, exists := renderDevIds[renderD]; !exists {
continue
}
if numaNode == -1 {
continue
}
devices[filepath.Base(path)] = map[string]interface{}{"card": card, "renderD": renderD, "devID": devID, "computePartitionType": computePartitionType, "memoryPartitionType": memoryPartitionType, "numaNode": numaNode, "nodeId": nodeId}
}
glog.Infof("Devices map: %v", devices)
return devices
}
func UniquePartitionConfigCount(devices map[string]map[string]interface{}) map[string]int {
partitionCountMap := make(map[string]int)
for _, device := range devices {
computePartitionType := device["computePartitionType"].(string)
memoryPartitionType := device["memoryPartitionType"].(string)
if computePartitionType != "" && memoryPartitionType != "" {
overallPartition := computePartitionType + "_" + memoryPartitionType
partitionCountMap[overallPartition]++
}
}
glog.Infof("Partition counts: %v", partitionCountMap)
return partitionCountMap
}
func IsHomogeneous() bool {
dcus := GetHYDCUs()
partitionCountMap := UniquePartitionConfigCount(dcus)
return len(partitionCountMap) <= 1
}
package plugin
import (
"bufio"
"fmt"
"github.com/golang/glog"
"github.com/kubevirt/device-plugin-manager/pkg/dpm"
"golang.org/x/net/context"
"k8s-device-plugin/internal/pkg/allocator"
"k8s-device-plugin/internal/pkg/hydcu"
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"syscall"
)
type DCULister struct {
ResUpdateChan chan dpm.PluginNameList
Heartbeat chan bool
Signal chan os.Signal
}
type DCUPlugin struct {
DCUs map[string]map[string]interface{}
Heartbeat chan bool
signal chan os.Signal
Resource string
devAllocator allocator.Policy
allocatorInitError bool
}
type DCUPluginOption func(*DCUPlugin)
func (p *DCUPlugin) Start() error {
p.signal = make(chan os.Signal, 1)
signal.Notify(p.signal, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGTERM)
err := p.devAllocator.Init(getDevices(), "")
if err != nil {
glog.Errorf("allocator init failed. Falling back to kubelet default allocation. Error %v", err)
p.allocatorInitError = true
}
return nil
}
func getDevices() []*allocator.Device {
devices := hydcu.GetHYDCUs()
var deviceList []*allocator.Device
for id, deviceData := range devices {
device := &allocator.Device{
Id: id,
Card: deviceData["card"].(int),
RenderD: deviceData["renderD"].(int),
DevId: deviceData["devID"].(string),
ComputePartitionType: deviceData["computePartitionType"].(string),
MemoryPartitionType: deviceData["memoryPartitionType"].(string),
NodeId: deviceData["nodeId"].(int),
NumaNode: deviceData["numaNode"].(int),
}
deviceList = append(deviceList, device)
}
return deviceList
}
// Stop is an optional interface that could be implemented by plugin.
// If case Stop is implemented, it will be executed by Manager after the
// plugin is unregistered from kubelet. This method could be used to tear
// down resources.
func (p *DCUPlugin) Stop() error {
return nil
}
func NewDCUPlugin(options ...DCUPluginOption) *DCUPlugin {
dcuPlugin := &DCUPlugin{}
for _, option := range options {
option(dcuPlugin)
}
return dcuPlugin
}
func WithAllocator(a allocator.Policy) DCUPluginOption {
return func(p *DCUPlugin) {
p.devAllocator = a
}
}
func WithHeartbeat(ch chan bool) DCUPluginOption {
return func(p *DCUPlugin) {
p.Heartbeat = ch
}
}
func WithResource(res string) DCUPluginOption {
return func(p *DCUPlugin) {
p.Resource = res
}
}
func (l *DCULister) GetResourceNamespace() string {
return "c-3000.com"
}
func (l *DCULister) Discover(pluginListCh chan dpm.PluginNameList) {
for {
select {
case newResourcesList := <-l.ResUpdateChan:
pluginListCh <- newResourcesList
case <-pluginListCh: // Stop message received
// Stop resourceUpdateCh
return
}
}
}
func (p *DCUPlugin) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
if p.allocatorInitError {
return &pluginapi.DevicePluginOptions{}, nil
}
return &pluginapi.DevicePluginOptions{
GetPreferredAllocationAvailable: true,
}, nil
}
func (p *DCUPlugin) PreStartContainer(ctx context.Context, r *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
return &pluginapi.PreStartContainerResponse{}, nil
}
func simpleHealthCheck() bool {
entries, err := filepath.Glob("/sys/class/kfd/kfd/topology/nodes/*/properties")
if err != nil {
glog.Errorf("Error finding properties files: %v", err)
return false
}
for _, propFile := range entries {
f, err := os.Open(propFile)
if err != nil {
glog.Errorf("Error opening %s: %v", propFile, err)
continue
}
defer f.Close()
var cpuCores, gfxVersion int
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "cpu_cores_count") {
parts := strings.Fields(line)
if len(parts) == 2 {
cpuCores, _ = strconv.Atoi(parts[1])
}
} else if strings.HasPrefix(line, "gfx_target_version") {
parts := strings.Fields(line)
if len(parts) == 2 {
gfxVersion, _ = strconv.Atoi(parts[1])
}
}
}
if err := scanner.Err(); err != nil {
glog.Warningf("Error scanning %s: %v", propFile, err)
continue
}
if cpuCores == 0 && gfxVersion > 0 {
return true
}
}
glog.Warning("No GPU nodes found via properties")
return false
}
func (p *DCUPlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
p.DCUs = hydcu.GetHYDCUs()
glog.Infof("Found %d DCUs", len(p.DCUs))
devs := make([]*pluginapi.Device, len(p.DCUs))
var isHomogeneous bool
isHomogeneous = hydcu.IsHomogeneous()
// Initialize a map to store partitionType based device list
resourceTypeDevs := make(map[string][]*pluginapi.Device)
if isHomogeneous {
// limit scope for hwloc
func() {
i := 0
for id, device := range p.DCUs {
dev := &pluginapi.Device{
ID: id,
Health: pluginapi.Healthy,
}
devs[i] = dev
i++
numas := []int64{int64(device["numaNode"].(int))}
glog.Infof("Watching GPU with bus ID: %s NUMA Node: %+v", id, numas)
numaNodes := make([]*pluginapi.NUMANode, len(numas))
for j, v := range numas {
numaNodes[j] = &pluginapi.NUMANode{
ID: int64(v),
}
}
dev.Topology = &pluginapi.TopologyInfo{
Nodes: numaNodes,
}
}
}()
s.Send(&pluginapi.ListAndWatchResponse{Devices: devs})
} else {
func() {
for id, device := range p.DCUs {
dev := &pluginapi.Device{
ID: id,
Health: pluginapi.Healthy,
}
// Append a device belonging to a certain partition type to its respective list
partitionType := device["computePartitionType"].(string) + "_" + device["memoryPartitionType"].(string)
resourceTypeDevs[partitionType] = append(resourceTypeDevs[partitionType], dev)
numas := []int64{int64(device["numaNode"].(int))}
glog.Infof("Watching GPU with bus ID: %s NUMA Node: %+v", id, numas)
numaNodes := make([]*pluginapi.NUMANode, len(numas))
for j, v := range numas {
numaNodes[j] = &pluginapi.NUMANode{
ID: int64(v),
}
}
dev.Topology = &pluginapi.TopologyInfo{
Nodes: numaNodes,
}
}
}()
// Send the appropriate list of devices based on the partitionType
if devList, exists := resourceTypeDevs[p.Resource]; exists {
s.Send(&pluginapi.ListAndWatchResponse{Devices: devList})
}
}
loop:
for {
select {
case <-p.Heartbeat:
var health = pluginapi.Unhealthy
if simpleHealthCheck() {
health = pluginapi.Healthy
}
// update with per device GPU health status
if isHomogeneous {
//exporter.PopulatePerGPUDHealth(devs, health)
glog.Infof("Healthy: %v", health)
s.Send(&pluginapi.ListAndWatchResponse{Devices: devs})
} else {
if devList, exists := resourceTypeDevs[p.Resource]; exists {
//exporter.PopulatePerGPUDHealth(devList, health)
s.Send(&pluginapi.ListAndWatchResponse{Devices: devList})
}
}
case <-p.signal:
glog.Infof("Received signal, exiting")
break loop
}
}
// returning a value with this function will unregister the plugin from k8s
return nil
}
func (p *DCUPlugin) GetPreferredAllocation(ctx context.Context, req *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
response := &pluginapi.PreferredAllocationResponse{}
for _, req := range req.ContainerRequests {
allocated_ids, err := p.devAllocator.Allocate(req.AvailableDeviceIDs, req.MustIncludeDeviceIDs, int(req.AllocationSize))
if err != nil {
glog.Errorf("unable to get preferred allocation list. Error:%v", err)
return nil, fmt.Errorf("unable to get preferred allocation list. Error:%v", err)
}
resp := &pluginapi.ContainerPreferredAllocationResponse{
DeviceIDs: allocated_ids,
}
response.ContainerResponses = append(response.ContainerResponses, resp)
}
return response, nil
}
func (p *DCUPlugin) Allocate(ctx context.Context, r *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
var response pluginapi.AllocateResponse
var car pluginapi.ContainerAllocateResponse
var dev *pluginapi.DeviceSpec
for _, req := range r.ContainerRequests {
car = pluginapi.ContainerAllocateResponse{}
// Currently, there are only 1 /dev/kfd per nodes regardless of the # of GPU available
// for compute/rocm/HSA use cases
dev = new(pluginapi.DeviceSpec)
dev.HostPath = "/dev/kfd"
dev.ContainerPath = "/dev/kfd"
dev.Permissions = "rw"
car.Devices = append(car.Devices, dev)
for _, id := range req.DevicesIDs {
glog.Infof("Allocating device ID: %s", id)
for k, v := range p.DCUs[id] {
// Map struct previously only had 'card' and 'renderD' and only those are paths to be appended as before
if k != "card" && k != "renderD" {
continue
}
devpath := fmt.Sprintf("/dev/dri/%s%d", k, v)
dev = new(pluginapi.DeviceSpec)
dev.HostPath = devpath
dev.ContainerPath = devpath
dev.Permissions = "rw"
car.Devices = append(car.Devices, dev)
}
}
response.ContainerResponses = append(response.ContainerResponses, &car)
}
return &response, nil
}
func (l *DCULister) NewPlugin(resourceLastName string) dpm.PluginInterface {
options := []DCUPluginOption{
WithHeartbeat(l.Heartbeat),
WithResource(resourceLastName),
WithAllocator(allocator.NewBestEffortPolicy()),
}
return NewDCUPlugin(options...)
}
root = true
[*.go]
indent_style = tab
indent_size = 4
insert_final_newline = true
[*.{yml,yaml}]
indent_style = space
indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true
# Setup a Global .gitignore for OS and editor generated files:
# https://help.github.com/articles/ignoring-files
# git config --global core.excludesfile ~/.gitignore_global
.vagrant
*.sublime-project
sudo: false
language: go
go:
- "stable"
- "1.11.x"
- "1.10.x"
- "1.9.x"
matrix:
include:
- go: "stable"
env: GOLINT=true
allow_failures:
- go: tip
fast_finish: true
before_install:
- if [ ! -z "${GOLINT}" ]; then go get -u golang.org/x/lint/golint; fi
script:
- go test --race ./...
after_script:
- test -z "$(gofmt -s -l -w . | tee /dev/stderr)"
- if [ ! -z "${GOLINT}" ]; then echo running golint; golint --set_exit_status ./...; else echo skipping golint; fi
- go vet ./...
os:
- linux
- osx
- windows
notifications:
email: false
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# You can update this list using the following command:
#
# $ git shortlog -se | awk '{print $2 " " $3 " " $4}'
# Please keep the list sorted.
Aaron L <aaron@bettercoder.net>
Adrien Bustany <adrien@bustany.org>
Amit Krishnan <amit.krishnan@oracle.com>
Anmol Sethi <me@anmol.io>
Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Bruno Bigras <bigras.bruno@gmail.com>
Caleb Spare <cespare@gmail.com>
Case Nelson <case@teammating.com>
Chris Howey <chris@howey.me> <howeyc@gmail.com>
Christoffer Buchholz <christoffer.buchholz@gmail.com>
Daniel Wagner-Hall <dawagner@gmail.com>
Dave Cheney <dave@cheney.net>
Evan Phoenix <evan@fallingsnow.net>
Francisco Souza <f@souza.cc>
Hari haran <hariharan.uno@gmail.com>
John C Barstow
Kelvin Fo <vmirage@gmail.com>
Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
Matt Layher <mdlayher@gmail.com>
Nathan Youngman <git@nathany.com>
Nickolai Zeldovich <nickolai@csail.mit.edu>
Patrick <patrick@dropbox.com>
Paul Hammond <paul@paulhammond.org>
Pawel Knap <pawelknap88@gmail.com>
Pieter Droogendijk <pieter@binky.org.uk>
Pursuit92 <JoshChase@techpursuit.net>
Riku Voipio <riku.voipio@linaro.org>
Rob Figueiredo <robfig@gmail.com>
Rodrigo Chiossi <rodrigochiossi@gmail.com>
Slawek Ligus <root@ooz.ie>
Soge Zhang <zhssoge@gmail.com>
Tiffany Jernigan <tiffany.jernigan@intel.com>
Tilak Sharma <tilaks@google.com>
Tom Payne <twpayne@gmail.com>
Travis Cline <travis.cline@gmail.com>
Tudor Golubenco <tudor.g@gmail.com>
Vahe Khachikyan <vahe@live.ca>
Yukang <moorekang@gmail.com>
bronze1man <bronze1man@gmail.com>
debrando <denis.brandolini@gmail.com>
henrikedwards <henrik.edwards@gmail.com>
铁哥 <guotie.9@gmail.com>
# Changelog
## v1.4.7 / 2018-01-09
* BSD/macOS: Fix possible deadlock on closing the watcher on kqueue (thanks @nhooyr and @glycerine)
* Tests: Fix missing verb on format string (thanks @rchiossi)
* Linux: Fix deadlock in Remove (thanks @aarondl)
* Linux: Watch.Add improvements (avoid race, fix consistency, reduce garbage) (thanks @twpayne)
* Docs: Moved FAQ into the README (thanks @vahe)
* Linux: Properly handle inotify's IN_Q_OVERFLOW event (thanks @zeldovich)
* Docs: replace references to OS X with macOS
## v1.4.2 / 2016-10-10
* Linux: use InotifyInit1 with IN_CLOEXEC to stop leaking a file descriptor to a child process when using fork/exec [#178](https://github.com/fsnotify/fsnotify/pull/178) (thanks @pattyshack)
## v1.4.1 / 2016-10-04
* Fix flaky inotify stress test on Linux [#177](https://github.com/fsnotify/fsnotify/pull/177) (thanks @pattyshack)
## v1.4.0 / 2016-10-01
* add a String() method to Event.Op [#165](https://github.com/fsnotify/fsnotify/pull/165) (thanks @oozie)
## v1.3.1 / 2016-06-28
* Windows: fix for double backslash when watching the root of a drive [#151](https://github.com/fsnotify/fsnotify/issues/151) (thanks @brunoqc)
## v1.3.0 / 2016-04-19
* Support linux/arm64 by [patching](https://go-review.googlesource.com/#/c/21971/) x/sys/unix and switching to to it from syscall (thanks @suihkulokki) [#135](https://github.com/fsnotify/fsnotify/pull/135)
## v1.2.10 / 2016-03-02
* Fix golint errors in windows.go [#121](https://github.com/fsnotify/fsnotify/pull/121) (thanks @tiffanyfj)
## v1.2.9 / 2016-01-13
kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsnotify/pull/111) (thanks @bep)
## v1.2.8 / 2015-12-17
* kqueue: fix race condition in Close [#105](https://github.com/fsnotify/fsnotify/pull/105) (thanks @djui for reporting the issue and @ppknap for writing a failing test)
* inotify: fix race in test
* enable race detection for continuous integration (Linux, Mac, Windows)
## v1.2.5 / 2015-10-17
* inotify: use epoll_create1 for arm64 support (requires Linux 2.6.27 or later) [#100](https://github.com/fsnotify/fsnotify/pull/100) (thanks @suihkulokki)
* inotify: fix path leaks [#73](https://github.com/fsnotify/fsnotify/pull/73) (thanks @chamaken)
* kqueue: watch for rename events on subdirectories [#83](https://github.com/fsnotify/fsnotify/pull/83) (thanks @guotie)
* kqueue: avoid infinite loops from symlinks cycles [#101](https://github.com/fsnotify/fsnotify/pull/101) (thanks @illicitonion)
## v1.2.1 / 2015-10-14
* kqueue: don't watch named pipes [#98](https://github.com/fsnotify/fsnotify/pull/98) (thanks @evanphx)
## v1.2.0 / 2015-02-08
* inotify: use epoll to wake up readEvents [#66](https://github.com/fsnotify/fsnotify/pull/66) (thanks @PieterD)
* inotify: closing watcher should now always shut down goroutine [#63](https://github.com/fsnotify/fsnotify/pull/63) (thanks @PieterD)
* kqueue: close kqueue after removing watches, fixes [#59](https://github.com/fsnotify/fsnotify/issues/59)
## v1.1.1 / 2015-02-05
* inotify: Retry read on EINTR [#61](https://github.com/fsnotify/fsnotify/issues/61) (thanks @PieterD)
## v1.1.0 / 2014-12-12
* kqueue: rework internals [#43](https://github.com/fsnotify/fsnotify/pull/43)
* add low-level functions
* only need to store flags on directories
* less mutexes [#13](https://github.com/fsnotify/fsnotify/issues/13)
* done can be an unbuffered channel
* remove calls to os.NewSyscallError
* More efficient string concatenation for Event.String() [#52](https://github.com/fsnotify/fsnotify/pull/52) (thanks @mdlayher)
* kqueue: fix regression in rework causing subdirectories to be watched [#48](https://github.com/fsnotify/fsnotify/issues/48)
* kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51)
## v1.0.4 / 2014-09-07
* kqueue: add dragonfly to the build tags.
* Rename source code files, rearrange code so exported APIs are at the top.
* Add done channel to example code. [#37](https://github.com/fsnotify/fsnotify/pull/37) (thanks @chenyukang)
## v1.0.3 / 2014-08-19
* [Fix] Windows MOVED_TO now translates to Create like on BSD and Linux. [#36](https://github.com/fsnotify/fsnotify/issues/36)
## v1.0.2 / 2014-08-17
* [Fix] Missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso)
* [Fix] Make ./path and path equivalent. (thanks @zhsso)
## v1.0.0 / 2014-08-15
* [API] Remove AddWatch on Windows, use Add.
* Improve documentation for exported identifiers. [#30](https://github.com/fsnotify/fsnotify/issues/30)
* Minor updates based on feedback from golint.
## dev / 2014-07-09
* Moved to [github.com/fsnotify/fsnotify](https://github.com/fsnotify/fsnotify).
* Use os.NewSyscallError instead of returning errno (thanks @hariharan-uno)
## dev / 2014-07-04
* kqueue: fix incorrect mutex used in Close()
* Update example to demonstrate usage of Op.
## dev / 2014-06-28
* [API] Don't set the Write Op for attribute notifications [#4](https://github.com/fsnotify/fsnotify/issues/4)
* Fix for String() method on Event (thanks Alex Brainman)
* Don't build on Plan 9 or Solaris (thanks @4ad)
## dev / 2014-06-21
* Events channel of type Event rather than *Event.
* [internal] use syscall constants directly for inotify and kqueue.
* [internal] kqueue: rename events to kevents and fileEvent to event.
## dev / 2014-06-19
* Go 1.3+ required on Windows (uses syscall.ERROR_MORE_DATA internally).
* [internal] remove cookie from Event struct (unused).
* [internal] Event struct has the same definition across every OS.
* [internal] remove internal watch and removeWatch methods.
## dev / 2014-06-12
* [API] Renamed Watch() to Add() and RemoveWatch() to Remove().
* [API] Pluralized channel names: Events and Errors.
* [API] Renamed FileEvent struct to Event.
* [API] Op constants replace methods like IsCreate().
## dev / 2014-06-12
* Fix data race on kevent buffer (thanks @tilaks) [#98](https://github.com/howeyc/fsnotify/pull/98)
## dev / 2014-05-23
* [API] Remove current implementation of WatchFlags.
* current implementation doesn't take advantage of OS for efficiency
* provides little benefit over filtering events as they are received, but has extra bookkeeping and mutexes
* no tests for the current implementation
* not fully implemented on Windows [#93](https://github.com/howeyc/fsnotify/issues/93#issuecomment-39285195)
## v0.9.3 / 2014-12-31
* kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51)
## v0.9.2 / 2014-08-17
* [Backport] Fix missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso)
## v0.9.1 / 2014-06-12
* Fix data race on kevent buffer (thanks @tilaks) [#98](https://github.com/howeyc/fsnotify/pull/98)
## v0.9.0 / 2014-01-17
* IsAttrib() for events that only concern a file's metadata [#79][] (thanks @abustany)
* [Fix] kqueue: fix deadlock [#77][] (thanks @cespare)
* [NOTICE] Development has moved to `code.google.com/p/go.exp/fsnotify` in preparation for inclusion in the Go standard library.
## v0.8.12 / 2013-11-13
* [API] Remove FD_SET and friends from Linux adapter
## v0.8.11 / 2013-11-02
* [Doc] Add Changelog [#72][] (thanks @nathany)
* [Doc] Spotlight and double modify events on macOS [#62][] (reported by @paulhammond)
## v0.8.10 / 2013-10-19
* [Fix] kqueue: remove file watches when parent directory is removed [#71][] (reported by @mdwhatcott)
* [Fix] kqueue: race between Close and readEvents [#70][] (reported by @bernerdschaefer)
* [Doc] specify OS-specific limits in README (thanks @debrando)
## v0.8.9 / 2013-09-08
* [Doc] Contributing (thanks @nathany)
* [Doc] update package path in example code [#63][] (thanks @paulhammond)
* [Doc] GoCI badge in README (Linux only) [#60][]
* [Doc] Cross-platform testing with Vagrant [#59][] (thanks @nathany)
## v0.8.8 / 2013-06-17
* [Fix] Windows: handle `ERROR_MORE_DATA` on Windows [#49][] (thanks @jbowtie)
## v0.8.7 / 2013-06-03
* [API] Make syscall flags internal
* [Fix] inotify: ignore event changes
* [Fix] race in symlink test [#45][] (reported by @srid)
* [Fix] tests on Windows
* lower case error messages
## v0.8.6 / 2013-05-23
* kqueue: Use EVT_ONLY flag on Darwin
* [Doc] Update README with full example
## v0.8.5 / 2013-05-09
* [Fix] inotify: allow monitoring of "broken" symlinks (thanks @tsg)
## v0.8.4 / 2013-04-07
* [Fix] kqueue: watch all file events [#40][] (thanks @ChrisBuchholz)
## v0.8.3 / 2013-03-13
* [Fix] inoitfy/kqueue memory leak [#36][] (reported by @nbkolchin)
* [Fix] kqueue: use fsnFlags for watching a directory [#33][] (reported by @nbkolchin)
## v0.8.2 / 2013-02-07
* [Doc] add Authors
* [Fix] fix data races for map access [#29][] (thanks @fsouza)
## v0.8.1 / 2013-01-09
* [Fix] Windows path separators
* [Doc] BSD License
## v0.8.0 / 2012-11-09
* kqueue: directory watching improvements (thanks @vmirage)
* inotify: add `IN_MOVED_TO` [#25][] (requested by @cpisto)
* [Fix] kqueue: deleting watched directory [#24][] (reported by @jakerr)
## v0.7.4 / 2012-10-09
* [Fix] inotify: fixes from https://codereview.appspot.com/5418045/ (ugorji)
* [Fix] kqueue: preserve watch flags when watching for delete [#21][] (reported by @robfig)
* [Fix] kqueue: watch the directory even if it isn't a new watch (thanks @robfig)
* [Fix] kqueue: modify after recreation of file
## v0.7.3 / 2012-09-27
* [Fix] kqueue: watch with an existing folder inside the watched folder (thanks @vmirage)
* [Fix] kqueue: no longer get duplicate CREATE events
## v0.7.2 / 2012-09-01
* kqueue: events for created directories
## v0.7.1 / 2012-07-14
* [Fix] for renaming files
## v0.7.0 / 2012-07-02
* [Feature] FSNotify flags
* [Fix] inotify: Added file name back to event path
## v0.6.0 / 2012-06-06
* kqueue: watch files after directory created (thanks @tmc)
## v0.5.1 / 2012-05-22
* [Fix] inotify: remove all watches before Close()
## v0.5.0 / 2012-05-03
* [API] kqueue: return errors during watch instead of sending over channel
* kqueue: match symlink behavior on Linux
* inotify: add `DELETE_SELF` (requested by @taralx)
* [Fix] kqueue: handle EINTR (reported by @robfig)
* [Doc] Godoc example [#1][] (thanks @davecheney)
## v0.4.0 / 2012-03-30
* Go 1 released: build with go tool
* [Feature] Windows support using winfsnotify
* Windows does not have attribute change notifications
* Roll attribute notifications into IsModify
## v0.3.0 / 2012-02-19
* kqueue: add files when watch directory
## v0.2.0 / 2011-12-30
* update to latest Go weekly code
## v0.1.0 / 2011-10-19
* kqueue: add watch on file creation to match inotify
* kqueue: create file event
* inotify: ignore `IN_IGNORED` events
* event String()
* linux: common FileEvent functions
* initial commit
[#79]: https://github.com/howeyc/fsnotify/pull/79
[#77]: https://github.com/howeyc/fsnotify/pull/77
[#72]: https://github.com/howeyc/fsnotify/issues/72
[#71]: https://github.com/howeyc/fsnotify/issues/71
[#70]: https://github.com/howeyc/fsnotify/issues/70
[#63]: https://github.com/howeyc/fsnotify/issues/63
[#62]: https://github.com/howeyc/fsnotify/issues/62
[#60]: https://github.com/howeyc/fsnotify/issues/60
[#59]: https://github.com/howeyc/fsnotify/issues/59
[#49]: https://github.com/howeyc/fsnotify/issues/49
[#45]: https://github.com/howeyc/fsnotify/issues/45
[#40]: https://github.com/howeyc/fsnotify/issues/40
[#36]: https://github.com/howeyc/fsnotify/issues/36
[#33]: https://github.com/howeyc/fsnotify/issues/33
[#29]: https://github.com/howeyc/fsnotify/issues/29
[#25]: https://github.com/howeyc/fsnotify/issues/25
[#24]: https://github.com/howeyc/fsnotify/issues/24
[#21]: https://github.com/howeyc/fsnotify/issues/21
# Contributing
## Issues
* Request features and report bugs using the [GitHub Issue Tracker](https://github.com/fsnotify/fsnotify/issues).
* Please indicate the platform you are using fsnotify on.
* A code example to reproduce the problem is appreciated.
## Pull Requests
### Contributor License Agreement
fsnotify is derived from code in the [golang.org/x/exp](https://godoc.org/golang.org/x/exp) package and it may be included [in the standard library](https://github.com/fsnotify/fsnotify/issues/1) in the future. Therefore fsnotify carries the same [LICENSE](https://github.com/fsnotify/fsnotify/blob/master/LICENSE) as Go. Contributors retain their copyright, so you need to fill out a short form before we can accept your contribution: [Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual).
Please indicate that you have signed the CLA in your pull request.
### How fsnotify is Developed
* Development is done on feature branches.
* Tests are run on BSD, Linux, macOS and Windows.
* Pull requests are reviewed and [applied to master][am] using [hub][].
* Maintainers may modify or squash commits rather than asking contributors to.
* To issue a new release, the maintainers will:
* Update the CHANGELOG
* Tag a version, which will become available through gopkg.in.
### How to Fork
For smooth sailing, always use the original import path. Installing with `go get` makes this easy.
1. Install from GitHub (`go get -u github.com/fsnotify/fsnotify`)
2. Create your feature branch (`git checkout -b my-new-feature`)
3. Ensure everything works and the tests pass (see below)
4. Commit your changes (`git commit -am 'Add some feature'`)
Contribute upstream:
1. Fork fsnotify on GitHub
2. Add your remote (`git remote add fork git@github.com:mycompany/repo.git`)
3. Push to the branch (`git push fork my-new-feature`)
4. Create a new Pull Request on GitHub
This workflow is [thoroughly explained by Katrina Owen](https://splice.com/blog/contributing-open-source-git-repositories-go/).
### Testing
fsnotify uses build tags to compile different code on Linux, BSD, macOS, and Windows.
Before doing a pull request, please do your best to test your changes on multiple platforms, and list which platforms you were able/unable to test on.
To aid in cross-platform testing there is a Vagrantfile for Linux and BSD.
* Install [Vagrant](http://www.vagrantup.com/) and [VirtualBox](https://www.virtualbox.org/)
* Setup [Vagrant Gopher](https://github.com/nathany/vagrant-gopher) in your `src` folder.
* Run `vagrant up` from the project folder. You can also setup just one box with `vagrant up linux` or `vagrant up bsd` (note: the BSD box doesn't support Windows hosts at this time, and NFS may prompt for your host OS password)
* Once setup, you can run the test suite on a given OS with a single command `vagrant ssh linux -c 'cd fsnotify/fsnotify; go test'`.
* When you're done, you will want to halt or destroy the Vagrant boxes.
Notice: fsnotify file system events won't trigger in shared folders. The tests get around this limitation by using the /tmp directory.
Right now there is no equivalent solution for Windows and macOS, but there are Windows VMs [freely available from Microsoft](http://www.modern.ie/en-us/virtualization-tools#downloads).
### Maintainers
Help maintaining fsnotify is welcome. To be a maintainer:
* Submit a pull request and sign the CLA as above.
* You must be able to run the test suite on Mac, Windows, Linux and BSD.
To keep master clean, the fsnotify project uses the "apply mail" workflow outlined in Nathaniel Talbott's post ["Merge pull request" Considered Harmful][am]. This requires installing [hub][].
All code changes should be internal pull requests.
Releases are tagged using [Semantic Versioning](http://semver.org/).
[hub]: https://github.com/github/hub
[am]: http://blog.spreedly.com/2014/06/24/merge-pull-request-considered-harmful/#.VGa5yZPF_Zs
Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2012-2019 fsnotify Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# File system notifications for Go
[![GoDoc](https://godoc.org/github.com/fsnotify/fsnotify?status.svg)](https://godoc.org/github.com/fsnotify/fsnotify) [![Go Report Card](https://goreportcard.com/badge/github.com/fsnotify/fsnotify)](https://goreportcard.com/report/github.com/fsnotify/fsnotify)
fsnotify utilizes [golang.org/x/sys](https://godoc.org/golang.org/x/sys) rather than `syscall` from the standard library. Ensure you have the latest version installed by running:
```console
go get -u golang.org/x/sys/...
```
Cross platform: Windows, Linux, BSD and macOS.
| Adapter | OS | Status |
| --------------------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
| inotify | Linux 2.6.27 or later, Android\* | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) |
| kqueue | BSD, macOS, iOS\* | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) |
| ReadDirectoryChangesW | Windows | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) |
| FSEvents | macOS | [Planned](https://github.com/fsnotify/fsnotify/issues/11) |
| FEN | Solaris 11 | [In Progress](https://github.com/fsnotify/fsnotify/issues/12) |
| fanotify | Linux 2.6.37+ | [Planned](https://github.com/fsnotify/fsnotify/issues/114) |
| USN Journals | Windows | [Maybe](https://github.com/fsnotify/fsnotify/issues/53) |
| Polling | *All* | [Maybe](https://github.com/fsnotify/fsnotify/issues/9) |
\* Android and iOS are untested.
Please see [the documentation](https://godoc.org/github.com/fsnotify/fsnotify) and consult the [FAQ](#faq) for usage information.
## API stability
fsnotify is a fork of [howeyc/fsnotify](https://godoc.org/github.com/howeyc/fsnotify) with a new API as of v1.0. The API is based on [this design document](http://goo.gl/MrYxyA).
All [releases](https://github.com/fsnotify/fsnotify/releases) are tagged based on [Semantic Versioning](http://semver.org/). Further API changes are [planned](https://github.com/fsnotify/fsnotify/milestones), and will be tagged with a new major revision number.
Go 1.6 supports dependencies located in the `vendor/` folder. Unless you are creating a library, it is recommended that you copy fsnotify into `vendor/github.com/fsnotify/fsnotify` within your project, and likewise for `golang.org/x/sys`.
## Usage
```go
package main
import (
"log"
"github.com/fsnotify/fsnotify"
)
func main() {
watcher, err := fsnotify.NewWatcher()
if err != nil {
log.Fatal(err)
}
defer watcher.Close()
done := make(chan bool)
go func() {
for {
select {
case event, ok := <-watcher.Events:
if !ok {
return
}
log.Println("event:", event)
if event.Op&fsnotify.Write == fsnotify.Write {
log.Println("modified file:", event.Name)
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Println("error:", err)
}
}
}()
err = watcher.Add("/tmp/foo")
if err != nil {
log.Fatal(err)
}
<-done
}
```
## Contributing
Please refer to [CONTRIBUTING][] before opening an issue or pull request.
## Example
See [example_test.go](https://github.com/fsnotify/fsnotify/blob/master/example_test.go).
## FAQ
**When a file is moved to another directory is it still being watched?**
No (it shouldn't be, unless you are watching where it was moved to).
**When I watch a directory, are all subdirectories watched as well?**
No, you must add watches for any directory you want to watch (a recursive watcher is on the roadmap [#18][]).
**Do I have to watch the Error and Event channels in a separate goroutine?**
As of now, yes. Looking into making this single-thread friendly (see [howeyc #7][#7])
**Why am I receiving multiple events for the same file on OS X?**
Spotlight indexing on OS X can result in multiple events (see [howeyc #62][#62]). A temporary workaround is to add your folder(s) to the *Spotlight Privacy settings* until we have a native FSEvents implementation (see [#11][]).
**How many files can be watched at once?**
There are OS-specific limits as to how many watches can be created:
* Linux: /proc/sys/fs/inotify/max_user_watches contains the limit, reaching this limit results in a "no space left on device" error.
* BSD / OSX: sysctl variables "kern.maxfiles" and "kern.maxfilesperproc", reaching these limits results in a "too many open files" error.
**Why don't notifications work with NFS filesystems or filesystem in userspace (FUSE)?**
fsnotify requires support from underlying OS to work. The current NFS protocol does not provide network level support for file notifications.
[#62]: https://github.com/howeyc/fsnotify/issues/62
[#18]: https://github.com/fsnotify/fsnotify/issues/18
[#11]: https://github.com/fsnotify/fsnotify/issues/11
[#7]: https://github.com/howeyc/fsnotify/issues/7
[contributing]: https://github.com/fsnotify/fsnotify/blob/master/CONTRIBUTING.md
## Related Projects
* [notify](https://github.com/rjeczalik/notify)
* [fsevents](https://github.com/fsnotify/fsevents)
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build solaris
package fsnotify
import (
"errors"
)
// Watcher watches a set of files, delivering events to a channel.
type Watcher struct {
Events chan Event
Errors chan error
}
// NewWatcher establishes a new watcher with the underlying OS and begins waiting for events.
func NewWatcher() (*Watcher, error) {
return nil, errors.New("FEN based watcher not yet supported for fsnotify\n")
}
// Close removes all watches and closes the events channel.
func (w *Watcher) Close() error {
return nil
}
// Add starts watching the named file or directory (non-recursively).
func (w *Watcher) Add(name string) error {
return nil
}
// Remove stops watching the the named file or directory (non-recursively).
func (w *Watcher) Remove(name string) error {
return nil
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !plan9
// Package fsnotify provides a platform-independent interface for file system notifications.
package fsnotify
import (
"bytes"
"errors"
"fmt"
)
// Event represents a single file system notification.
type Event struct {
Name string // Relative path to the file or directory.
Op Op // File operation that triggered the event.
}
// Op describes a set of file operations.
type Op uint32
// These are the generalized file operations that can trigger a notification.
const (
Create Op = 1 << iota
Write
Remove
Rename
Chmod
)
func (op Op) String() string {
// Use a buffer for efficient string concatenation
var buffer bytes.Buffer
if op&Create == Create {
buffer.WriteString("|CREATE")
}
if op&Remove == Remove {
buffer.WriteString("|REMOVE")
}
if op&Write == Write {
buffer.WriteString("|WRITE")
}
if op&Rename == Rename {
buffer.WriteString("|RENAME")
}
if op&Chmod == Chmod {
buffer.WriteString("|CHMOD")
}
if buffer.Len() == 0 {
return ""
}
return buffer.String()[1:] // Strip leading pipe
}
// String returns a string representation of the event in the form
// "file: REMOVE|WRITE|..."
func (e Event) String() string {
return fmt.Sprintf("%q: %s", e.Name, e.Op.String())
}
// Common errors that can be reported by a watcher
var (
ErrEventOverflow = errors.New("fsnotify queue overflow")
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment