accelerator_rocm.go 2.15 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
//go:build rocm

package llm

import (
	"bytes"
	"encoding/csv"
	"errors"
	"fmt"
	"io"
	"log"
	"os"
	"os/exec"
	"path"
	"path/filepath"
	"strconv"
	"strings"
)

var errNoAccel = errors.New("rocm-smi command failed")

// acceleratedRunner returns the runner for this accelerator given the provided buildPath string.
func acceleratedRunner(buildPath string) []ModelRunner {
	return []ModelRunner{
		ModelRunner{
			Path:        path.Join(buildPath, "rocm", "bin", "ollama-runner"),
			Accelerated: true,
		},
	}
}

// CheckVRAM returns the available VRAM in MiB on Linux machines with AMD GPUs
func CheckVRAM() (int64, error) {
	rocmHome := os.Getenv("ROCM_PATH")
	if rocmHome == "" {
		rocmHome = os.Getenv("ROCM_HOME")
	}
	if rocmHome == "" {
		log.Println("warning: ROCM_PATH is not set. Trying a likely fallback path, but it is recommended to set this variable in the environment.")
		rocmHome = "/opt/rocm"
	}
	cmd := exec.Command(filepath.Join(rocmHome, "bin/rocm-smi"), "--showmeminfo", "VRAM", "--csv")
	var stdout bytes.Buffer
	cmd.Stdout = &stdout
	err := cmd.Run()
	if err != nil {
		return 0, errNoAccel
	}
	csvData := csv.NewReader(&stdout)
	// llama.cpp or ROCm don't seem to understand splitting the VRAM allocations across them properly, so try to find the biggest card instead :(. FIXME.
	totalBiggestCard := int64(0)
	bigCardName := ""
	for {
		record, err := csvData.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
		}
		if !strings.HasPrefix(record[0], "card") {
			continue
		}
		cardTotal, err := strconv.ParseInt(record[1], 10, 64)
		if err != nil {
			return 0, err
		}
		cardUsed, err := strconv.ParseInt(record[2], 10, 64)
		if err != nil {
			return 0, err
		}
		possible := (cardTotal - cardUsed)
		log.Printf("ROCm found %d MiB of available VRAM on device %q", possible/1024/1024, record[0])
		if possible > totalBiggestCard {
			totalBiggestCard = possible
			bigCardName = record[0]
		}
	}
	if totalBiggestCard == 0 {
		log.Printf("found ROCm GPU but failed to parse free VRAM!")
		return 0, errNoAccel
	}
	log.Printf("ROCm selecting device %q", bigCardName)
	return totalBiggestCard, nil
}