gpu.go 2.09 KB
Newer Older
1
package discover
2
3

import (
4
	"log/slog"
5
	"os"
6
	"regexp"
7
	"runtime"
8
	"strconv"
9
	"strings"
10
	"time"
Michael Yang's avatar
Michael Yang committed
11

12
	"github.com/ollama/ollama/logutil"
13
	"github.com/ollama/ollama/ml"
14
15
)

16
17
18
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
19

20
21
// GetSystemInfo returns the last cached state of the GPUs on the system
func GetSystemInfo() ml.SystemInfo {
22
23
24
25
26
27
	logutil.Trace("performing CPU discovery")
	startDiscovery := time.Now()
	defer func() {
		logutil.Trace("CPU discovery completed", "duration", time.Since(startDiscovery))
	}()

28
	memInfo, err := GetCPUMem()
29
30
	if err != nil {
		slog.Warn("error looking up system memory", "error", err)
31
	}
32
33
34
35
	var threadCount int
	cpus := GetCPUDetails()
	for _, c := range cpus {
		threadCount += c.CoreCount - c.EfficiencyCoreCount
36
37
	}

38
39
40
	if threadCount == 0 {
		// Fall back to Go's num CPU
		threadCount = runtime.NumCPU()
41
42
	}

43
44
45
46
47
	return ml.SystemInfo{
		ThreadCount: threadCount,
		TotalMemory: memInfo.TotalMemory,
		FreeMemory:  memInfo.FreeMemory,
		FreeSwap:    memInfo.FreeSwap,
48
49
	}
}
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

func cudaJetpack() string {
	if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
		if CudaTegra != "" {
			ver := strings.Split(CudaTegra, ".")
			if len(ver) > 0 {
				return "jetpack" + ver[0]
			}
		} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
			r := regexp.MustCompile(` R(\d+) `)
			m := r.FindSubmatch(data)
			if len(m) != 2 {
				slog.Info("Unexpected format for /etc/nv_tegra_release.  Set JETSON_JETPACK to select version")
			} else {
				if l4t, err := strconv.Atoi(string(m[1])); err == nil {
					// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
					// https://developer.nvidia.com/embedded/jetpack-archive
					switch l4t {
					case 35:
						return "jetpack5"
					case 36:
						return "jetpack6"
					default:
						// Newer Jetson systems use the SBSU runtime
						slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
					}
				}
			}
		}
	}
	return ""
}