Commit 0ce8bcfd authored by xuxzh1's avatar xuxzh1 🎱
Browse files

init

parent b0135f4b
...@@ -12,7 +12,7 @@ countries = [ ...@@ -12,7 +12,7 @@ countries = [
"France", "France",
] ]
country = random.choice(countries) country = random.choice(countries)
model = "llama3" model = "llama3.1"
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters." prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
......
...@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran ...@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
## Running the Example ## Running the Example
1. Ensure you have the `llama3` model installed: 1. Ensure you have the `llama3.1` model installed:
```bash ```bash
ollama pull llama3 ollama pull llama3.1
``` ```
2. Install the Python Requirements. 2. Install the Python Requirements.
......
...@@ -2,7 +2,7 @@ import json ...@@ -2,7 +2,7 @@ import json
import requests import requests
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
model = "llama3" # TODO: update this for whatever model you wish to use model = "llama3.1" # TODO: update this for whatever model you wish to use
def chat(messages): def chat(messages):
......
...@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam ...@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
## Running the Example ## Running the Example
1. Ensure you have the `llama3` model installed: 1. Ensure you have the `llama3.1` model installed:
```bash ```bash
ollama pull llama3 ollama pull llama3.1
``` ```
2. Install the Python Requirements. 2. Install the Python Requirements.
......
import * as readline from "readline"; import * as readline from "readline";
const model = "llama3"; const model = "llama3.1";
type Message = { type Message = {
role: "assistant" | "user" | "system"; role: "assistant" | "user" | "system";
content: string; content: string;
......
...@@ -3,6 +3,7 @@ package format ...@@ -3,6 +3,7 @@ package format
import ( import (
"fmt" "fmt"
"math" "math"
"strconv"
) )
const ( const (
...@@ -28,6 +29,6 @@ func HumanNumber(b uint64) string { ...@@ -28,6 +29,6 @@ func HumanNumber(b uint64) string {
case b >= Thousand: case b >= Thousand:
return fmt.Sprintf("%.0fK", float64(b)/Thousand) return fmt.Sprintf("%.0fK", float64(b)/Thousand)
default: default:
return fmt.Sprintf("%d", b) return strconv.FormatUint(b, 10)
} }
} }
...@@ -5,7 +5,6 @@ import ( ...@@ -5,7 +5,6 @@ import (
) )
func TestHumanNumber(t *testing.T) { func TestHumanNumber(t *testing.T) {
type testCase struct { type testCase struct {
input uint64 input uint64
expected string expected string
......
...@@ -16,7 +16,9 @@ require ( ...@@ -16,7 +16,9 @@ require (
) )
require ( require (
github.com/agnivade/levenshtein v1.1.1
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
github.com/google/go-cmp v0.6.0
github.com/mattn/go-runewidth v0.0.14 github.com/mattn/go-runewidth v0.0.14
github.com/nlpodyssey/gopickle v0.3.0 github.com/nlpodyssey/gopickle v0.3.0
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
...@@ -70,7 +72,7 @@ require ( ...@@ -70,7 +72,7 @@ require (
golang.org/x/net v0.25.0 // indirect golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 golang.org/x/sys v0.20.0
golang.org/x/term v0.20.0 golang.org/x/term v0.20.0
golang.org/x/text v0.15.0 // indirect golang.org/x/text v0.15.0
google.golang.org/protobuf v1.34.1 google.golang.org/protobuf v1.34.1
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect
) )
...@@ -4,10 +4,14 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7 ...@@ -4,10 +4,14 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7
gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8=
github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6ICHXqG5hm0ZW5IHyeEJXoIJSOZeBLmWPNeIQ= github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6ICHXqG5hm0ZW5IHyeEJXoIJSOZeBLmWPNeIQ=
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs= github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
...@@ -36,6 +40,8 @@ github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLc ...@@ -36,6 +40,8 @@ github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLc
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
package gpu package gpu
import ( import (
"fmt" "errors"
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
...@@ -49,9 +49,17 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { ...@@ -49,9 +49,17 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
} }
func commonAMDValidateLibDir() (string, error) { func commonAMDValidateLibDir() (string, error) {
// We try to favor system paths first, so that we can wire up the subprocess to use // Favor our bundled version
// the system version. Only use our bundled version if the system version doesn't work
// This gives users a more recovery options if versions have subtle problems at runtime // Installer payload location if we're running the installed binary
exe, err := os.Executable()
if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil
}
}
// Prefer explicit HIP env var // Prefer explicit HIP env var
hipPath := os.Getenv("HIP_PATH") hipPath := os.Getenv("HIP_PATH")
...@@ -87,14 +95,5 @@ func commonAMDValidateLibDir() (string, error) { ...@@ -87,14 +95,5 @@ func commonAMDValidateLibDir() (string, error) {
} }
} }
// Installer payload location if we're running the installed binary return "", errors.New("no suitable rocm found, falling back to CPU")
exe, err := os.Executable()
if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil
}
}
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
} }
package gpu package gpu
import ( import (
"errors"
"fmt" "fmt"
"log/slog" "log/slog"
"syscall" "syscall"
...@@ -33,9 +34,10 @@ type HipLib struct { ...@@ -33,9 +34,10 @@ type HipLib struct {
} }
func NewHipLib() (*HipLib, error) { func NewHipLib() (*HipLib, error) {
h, err := windows.LoadLibrary("amdhip64.dll") // At runtime we depend on v6, so discover GPUs with the same library for a consistent set of GPUs
h, err := windows.LoadLibrary("amdhip64_6.dll")
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err) return nil, fmt.Errorf("unable to load amdhip64_6.dll, please make sure to upgrade to the latest amd driver: %w", err)
} }
hl := &HipLib{} hl := &HipLib{}
hl.dll = h hl.dll = h
...@@ -75,7 +77,7 @@ func (hl *HipLib) Release() { ...@@ -75,7 +77,7 @@ func (hl *HipLib) Release() {
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) { func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
if hl.dll == 0 { if hl.dll == 0 {
return 0, 0, fmt.Errorf("dll has been unloaded") return 0, 0, errors.New("dll has been unloaded")
} }
var version int var version int
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version))) status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
...@@ -84,9 +86,8 @@ func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) { ...@@ -84,9 +86,8 @@ func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
} }
slog.Debug("hipDriverGetVersion", "version", version) slog.Debug("hipDriverGetVersion", "version", version)
// TODO - this isn't actually right, but the docs claim hipDriverGetVersion isn't accurate anyway... driverMajor = version / 10000000
driverMajor = version / 1000 driverMinor = (version - (driverMajor * 10000000)) / 100000
driverMinor = (version - (driverMajor * 1000)) / 10
return driverMajor, driverMinor, nil return driverMajor, driverMinor, nil
} }
...@@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int { ...@@ -110,7 +111,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
func (hl *HipLib) HipSetDevice(device int) error { func (hl *HipLib) HipSetDevice(device int) error {
if hl.dll == 0 { if hl.dll == 0 {
return fmt.Errorf("dll has been unloaded") return errors.New("dll has been unloaded")
} }
status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device)) status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
if status != hipSuccess { if status != hipSuccess {
...@@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error { ...@@ -121,7 +122,7 @@ func (hl *HipLib) HipSetDevice(device int) error {
func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) { func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
if hl.dll == 0 { if hl.dll == 0 {
return nil, fmt.Errorf("dll has been unloaded") return nil, errors.New("dll has been unloaded")
} }
var props hipDevicePropMinimal var props hipDevicePropMinimal
status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device)) status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
...@@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err ...@@ -134,7 +135,7 @@ func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, err
// free, total, err // free, total, err
func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) { func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
if hl.dll == 0 { if hl.dll == 0 {
return 0, 0, fmt.Errorf("dll has been unloaded") return 0, 0, errors.New("dll has been unloaded")
} }
var totalMemory uint64 var totalMemory uint64
var freeMemory uint64 var freeMemory uint64
......
...@@ -10,9 +10,11 @@ import ( ...@@ -10,9 +10,11 @@ import (
"path/filepath" "path/filepath"
"regexp" "regexp"
"slices" "slices"
"sort"
"strconv" "strconv"
"strings" "strings"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
...@@ -20,25 +22,35 @@ import ( ...@@ -20,25 +22,35 @@ import (
const ( const (
// DriverVersionFile = "/sys/module/amdgpu/version" // DriverVersionFile = "/sys/module/amdgpu/version"
DriverVersionFile = "/sys/module/hydcu/version" DriverVersionFile = "/sys/module/hydcu/version"
AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/" AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/"
GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties" GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"
// Prefix with the node dir // Prefix with the node dir
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
GPUUsedMemoryFileGlob = "mem_banks/*/used_memory"
// Direct Rendering Manager sysfs location
DRMDeviceDirGlob = "/sys/class/drm/card*/device"
DRMTotalMemoryFile = "mem_info_vram_total"
DRMUsedMemoryFile = "mem_info_vram_used"
// In hex; properties file is in decimal
DRMUniqueIDFile = "unique_id"
DRMVendorFile = "vendor"
DRMDeviceFile = "device"
) )
var ( var (
// Used to validate if the given ROCm lib is usable // Used to validate if the given ROCm lib is usable
// ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here... // ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
ROCmLibGlobs = []string{"libhipblas.so.0.1","rocblas"} ROCmLibGlobs = []string{"libhipblas.so.0.1","rocblas"}
// RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
RocmStandardLocations = []string{"/opt/dtk/lib", "/usr/lib64"} RocmStandardLocations = []string{"/opt/dtk/lib", "/usr/lib64"}
) )
// Gather GPU information from the amdgpu driver if any supported GPUs are detected // Gather GPU information from the amdgpu driver if any supported GPUs are detected
func AMDGetGPUInfo() []GpuInfo { func AMDGetGPUInfo() []RocmGPUInfo {
resp := []GpuInfo{} resp := []RocmGPUInfo{}
if !AMDDetected() { if !AMDDetected() {
return resp return resp
} }
...@@ -52,9 +64,9 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -52,9 +64,9 @@ func AMDGetGPUInfo() []GpuInfo {
// Determine if the user has already pre-selected which GPUs to look at, then ignore the others // Determine if the user has already pre-selected which GPUs to look at, then ignore the others
var visibleDevices []string var visibleDevices []string
hipVD := os.Getenv("HIP_VISIBLE_DEVICES") // zero based index only hipVD := envconfig.HipVisibleDevices() // zero based index only
rocrVD := os.Getenv("ROCR_VISIBLE_DEVICES") // zero based index or UUID, but consumer cards seem to not support UUID rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID
gpuDO := os.Getenv("GPU_DEVICE_ORDINAL") // zero based index gpuDO := envconfig.GpuDeviceOrdinal() // zero based index
switch { switch {
// TODO is this priorty order right? // TODO is this priorty order right?
case hipVD != "": case hipVD != "":
...@@ -67,13 +79,27 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -67,13 +79,27 @@ func AMDGetGPUInfo() []GpuInfo {
visibleDevices = strings.Split(gpuDO, ",") visibleDevices = strings.Split(gpuDO, ",")
} }
gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION") gfxOverride := envconfig.HsaOverrideGfxVersion()
var supported []string var supported []string
libDir := "" libDir := ""
// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract // The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU) // from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
matches, _ := filepath.Glob(GPUPropertiesFileGlob) matches, _ := filepath.Glob(GPUPropertiesFileGlob)
sort.Slice(matches, func(i, j int) bool {
// /sys/class/kfd/kfd/topology/nodes/<number>/properties
a, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[i])), 10, 64)
if err != nil {
slog.Debug("parse err", "error", err, "match", matches[i])
return false
}
b, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[j])), 10, 64)
if err != nil {
slog.Debug("parse err", "error", err, "match", matches[i])
return false
}
return a < b
})
cpuCount := 0 cpuCount := 0
for _, match := range matches { for _, match := range matches {
slog.Debug("evaluating amdgpu node " + match) slog.Debug("evaluating amdgpu node " + match)
...@@ -92,7 +118,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -92,7 +118,7 @@ func AMDGetGPUInfo() []GpuInfo {
scanner := bufio.NewScanner(fp) scanner := bufio.NewScanner(fp)
isCPU := false isCPU := false
var major, minor, patch uint64 var major, minor, patch uint64
var vendor, device uint64 var vendor, device, uniqueID uint64
for scanner.Scan() { for scanner.Scan() {
line := strings.TrimSpace(scanner.Text()) line := strings.TrimSpace(scanner.Text())
// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs // Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
...@@ -123,30 +149,43 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -123,30 +149,43 @@ func AMDGetGPUInfo() []GpuInfo {
} else if strings.HasPrefix(line, "vendor_id") { } else if strings.HasPrefix(line, "vendor_id") {
ver := strings.Fields(line) ver := strings.Fields(line)
if len(ver) != 2 { if len(ver) != 2 {
slog.Debug("malformed vendor_id", "vendor_id", line) slog.Debug("malformed", "vendor_id", line)
continue continue
} }
vendor, err = strconv.ParseUint(ver[1], 10, 32) vendor, err = strconv.ParseUint(ver[1], 10, 64)
if err != nil { if err != nil {
slog.Debug("malformed vendor_id" + line) slog.Debug("malformed", "vendor_id", line, "error", err)
} }
} else if strings.HasPrefix(line, "device_id") { } else if strings.HasPrefix(line, "device_id") {
ver := strings.Fields(line) ver := strings.Fields(line)
if len(ver) != 2 { if len(ver) != 2 {
slog.Debug("malformed device_id", "device_id", line) slog.Debug("malformed", "device_id", line)
continue
}
device, err = strconv.ParseUint(ver[1], 10, 64)
if err != nil {
slog.Debug("malformed", "device_id", line, "error", err)
}
} else if strings.HasPrefix(line, "unique_id") {
ver := strings.Fields(line)
if len(ver) != 2 {
slog.Debug("malformed", "unique_id", line)
continue continue
} }
device, err = strconv.ParseUint(ver[1], 10, 32) uniqueID, err = strconv.ParseUint(ver[1], 10, 64)
if err != nil { if err != nil {
slog.Debug("malformed device_id" + line) slog.Debug("malformed", "unique_id", line, "error", err)
} }
} }
// TODO - any other properties we want to extract and record? // TODO - any other properties we want to extract and record?
// vendor_id + device_id -> pci lookup for "Name" // vendor_id + device_id -> pci lookup for "Name"
// Other metrics that may help us understand relative performance between multiple GPUs // Other metrics that may help us understand relative performance between multiple GPUs
} }
// Note: while ./mem_banks/*/used_memory exists, it doesn't appear to take other VRAM consumers
// into consideration, so we instead map the device over to the DRM driver sysfs nodes which
// do reliably report VRAM usage.
if isCPU { if isCPU {
cpuCount++ cpuCount++
continue continue
...@@ -158,7 +197,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -158,7 +197,7 @@ func AMDGetGPUInfo() []GpuInfo {
// Shouldn't happen, but just in case... // Shouldn't happen, but just in case...
if gpuID < 0 { if gpuID < 0 {
slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue") slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
return []GpuInfo{} return nil
} }
if int(major) < RocmComputeMin { if int(major) < RocmComputeMin {
...@@ -169,65 +208,68 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -169,65 +208,68 @@ func AMDGetGPUInfo() []GpuInfo {
// Look up the memory for the current node // Look up the memory for the current node
totalMemory := uint64(0) totalMemory := uint64(0)
usedMemory := uint64(0) usedMemory := uint64(0)
propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUTotalMemoryFileGlob) var usedFile string
propFiles, err := filepath.Glob(propGlob) mapping := []struct {
if err != nil { id uint64
slog.Warn("error looking up total GPU memory", "glob", propGlob, "error", err) filename string
}{
{vendor, DRMVendorFile},
{device, DRMDeviceFile},
{uniqueID, DRMUniqueIDFile}, // Not all devices will report this
} }
// 1 or more memory banks - sum the values of all of them slog.Debug("mapping amdgpu to drm sysfs nodes", "amdgpu", match, "vendor", vendor, "device", device, "unique_id", uniqueID)
for _, propFile := range propFiles { // Map over to DRM location to find the total/free memory
fp, err := os.Open(propFile) drmMatches, _ := filepath.Glob(DRMDeviceDirGlob)
if err != nil { for _, devDir := range drmMatches {
slog.Warn("failed to open sysfs node", "file", propFile, "erroir", err) matched := true
continue for _, m := range mapping {
} if m.id == 0 {
defer fp.Close() // Null ID means it didn't populate, so we can't use it to match
scanner := bufio.NewScanner(fp) continue
for scanner.Scan() { }
line := strings.TrimSpace(scanner.Text()) filename := filepath.Join(devDir, m.filename)
if strings.HasPrefix(line, "size_in_bytes") { buf, err := os.ReadFile(filename)
ver := strings.Fields(line) if err != nil {
if len(ver) != 2 { slog.Debug("failed to read sysfs node", "file", filename, "error", err)
slog.Warn("malformed " + line) matched = false
continue break
} }
bankSizeInBytes, err := strconv.ParseUint(ver[1], 10, 64) // values here are in hex, strip off the lead 0x and parse so we can compare the numeric (decimal) values in amdgpu
if err != nil { cmp, err := strconv.ParseUint(strings.TrimPrefix(strings.TrimSpace(string(buf)), "0x"), 16, 64)
slog.Warn("malformed int " + line) if err != nil {
continue slog.Debug("failed to parse sysfs node", "file", filename, "error", err)
} matched = false
totalMemory += bankSizeInBytes break
}
if cmp != m.id {
matched = false
break
} }
} }
} if !matched {
if totalMemory == 0 {
slog.Warn("amdgpu reports zero total memory", "gpu", gpuID)
continue
}
usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(nodeID), GPUUsedMemoryFileGlob)
usedFiles, err := filepath.Glob(usedGlob)
if err != nil {
slog.Warn("error looking up used GPU memory", "glob", usedGlob, "error", err)
continue
}
for _, usedFile := range usedFiles {
fp, err := os.Open(usedFile)
if err != nil {
slog.Warn("failed to open sysfs node", "file", usedFile, "error", err)
continue continue
} }
defer fp.Close()
data, err := io.ReadAll(fp) // Found the matching DRM directory
slog.Debug("matched", "amdgpu", match, "drm", devDir)
totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
buf, err := os.ReadFile(totalFile)
if err != nil { if err != nil {
slog.Warn("failed to read sysfs node", "file", usedFile, "error", err) slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
continue break
} }
used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) totalMemory, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
if err != nil { if err != nil {
slog.Warn("malformed used memory", "data", string(data), "error", err) slog.Debug("failed to parse sysfs node", "file", totalFile, "error", err)
continue break
} }
usedMemory += used
usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
usedMemory, err = getFreeMemory(usedFile)
if err != nil {
slog.Debug("failed to update used memory", "error", err)
}
break
} }
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library // iGPU detection, remove this check once we can support an iGPU variant of the rocm library
...@@ -243,18 +285,21 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -243,18 +285,21 @@ func AMDGetGPUInfo() []GpuInfo {
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory)) slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory)) slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
gpuInfo := GpuInfo{ gpuInfo := RocmGPUInfo{
Library: "rocm", GpuInfo: GpuInfo{
memInfo: memInfo{ Library: "rocm",
TotalMemory: totalMemory, memInfo: memInfo{
FreeMemory: (totalMemory - usedMemory), TotalMemory: totalMemory,
FreeMemory: (totalMemory - usedMemory),
},
ID: strconv.Itoa(gpuID),
Name: name,
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
MinimumMemory: rocmMinimumMemory,
DriverMajor: driverMajor,
DriverMinor: driverMinor,
}, },
ID: fmt.Sprintf("%d", gpuID), usedFilepath: usedFile,
Name: name,
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
MinimumMemory: rocmMinimumMemory,
DriverMajor: driverMajor,
DriverMinor: driverMinor,
} }
// If the user wants to filter to a subset of devices, filter out if we aren't a match // If the user wants to filter to a subset of devices, filter out if we aren't a match
...@@ -278,7 +323,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -278,7 +323,7 @@ func AMDGetGPUInfo() []GpuInfo {
libDir, err = AMDValidateLibDir() libDir, err = AMDValidateLibDir()
if err != nil { if err != nil {
slog.Warn("unable to verify rocm library, will use cpu", "error", err) slog.Warn("unable to verify rocm library, will use cpu", "error", err)
return []GpuInfo{} return nil
} }
} }
gpuInfo.DependencyPath = libDir gpuInfo.DependencyPath = libDir
...@@ -289,7 +334,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -289,7 +334,7 @@ func AMDGetGPUInfo() []GpuInfo {
supported, err = GetSupportedGFX(libDir) supported, err = GetSupportedGFX(libDir)
if err != nil { if err != nil {
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err) slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
return []GpuInfo{} return nil
} }
slog.Debug("rocm supported GPUs", "types", supported) slog.Debug("rocm supported GPUs", "types", supported)
} }
...@@ -306,6 +351,11 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -306,6 +351,11 @@ func AMDGetGPUInfo() []GpuInfo {
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride) slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
} }
// Check for env var workarounds
if name == "1002:687f" { // Vega RX 56
gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"})
}
// The GPU has passed all the verification steps and is supported // The GPU has passed all the verification steps and is supported
resp = append(resp, gpuInfo) resp = append(resp, gpuInfo)
} }
...@@ -346,7 +396,7 @@ func AMDValidateLibDir() (string, error) { ...@@ -346,7 +396,7 @@ func AMDValidateLibDir() (string, error) {
// If we still haven't found a usable rocm, the user will have to install it on their own // If we still haven't found a usable rocm, the user will have to install it on their own
slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install") slog.Warn("amdgpu detected, but no compatible rocm library found. Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
return "", fmt.Errorf("no suitable rocm found, falling back to CPU") return "", errors.New("no suitable rocm found, falling back to CPU")
} }
func AMDDriverVersion() (driverMajor, driverMinor int, err error) { func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
...@@ -380,3 +430,31 @@ func AMDDriverVersion() (driverMajor, driverMinor int, err error) { ...@@ -380,3 +430,31 @@ func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
} }
return driverMajor, driverMinor, nil return driverMajor, driverMinor, nil
} }
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
if len(gpus) == 0 {
return nil
}
for i := range gpus {
usedMemory, err := getFreeMemory(gpus[i].usedFilepath)
if err != nil {
return err
}
slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(gpus[i].TotalMemory-usedMemory))
gpus[i].FreeMemory = gpus[i].TotalMemory - usedMemory
}
return nil
}
func getFreeMemory(usedFile string) (uint64, error) {
buf, err := os.ReadFile(usedFile)
if err != nil {
return 0, fmt.Errorf("failed to read sysfs node %s %w", usedFile, err)
}
usedMemory, err := strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
if err != nil {
slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
return 0, fmt.Errorf("failed to parse sysfs node %s %w", usedFile, err)
}
return usedMemory, nil
}
...@@ -2,13 +2,15 @@ package gpu ...@@ -2,13 +2,15 @@ package gpu
import ( import (
"bytes" "bytes"
"fmt" "errors"
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
"slices" "slices"
"strconv"
"strings" "strings"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
...@@ -20,12 +22,12 @@ const ( ...@@ -20,12 +22,12 @@ const (
var ( var (
// Used to validate if the given ROCm lib is usable // Used to validate if the given ROCm lib is usable
ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here... ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // This is not sufficient to discern v5 vs v6
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob? RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
) )
func AMDGetGPUInfo() []GpuInfo { func AMDGetGPUInfo() []RocmGPUInfo {
resp := []GpuInfo{} resp := []RocmGPUInfo{}
hl, err := NewHipLib() hl, err := NewHipLib()
if err != nil { if err != nil {
slog.Debug(err.Error()) slog.Debug(err.Error())
...@@ -33,12 +35,11 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -33,12 +35,11 @@ func AMDGetGPUInfo() []GpuInfo {
} }
defer hl.Release() defer hl.Release()
// TODO - this reports incorrect version information, so omitting for now driverMajor, driverMinor, err := hl.AMDDriverVersion()
// driverMajor, driverMinor, err := hl.AMDDriverVersion() if err != nil {
// if err != nil { // For now this is benign, but we may eventually need to fail compatibility checks
// // For now this is benign, but we may eventually need to fail compatibility checks slog.Debug("error looking up amd driver version", "error", err)
// slog.Debug("error looking up amd driver version", "error", err) }
// }
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified // Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
count := hl.HipGetDeviceCount() count := hl.HipGetDeviceCount()
...@@ -52,7 +53,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -52,7 +53,7 @@ func AMDGetGPUInfo() []GpuInfo {
} }
var supported []string var supported []string
gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION") gfxOverride := envconfig.HsaOverrideGfxVersion()
if gfxOverride == "" { if gfxOverride == "" {
supported, err = GetSupportedGFX(libDir) supported, err = GetSupportedGFX(libDir)
if err != nil { if err != nil {
...@@ -65,7 +66,7 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -65,7 +66,7 @@ func AMDGetGPUInfo() []GpuInfo {
slog.Debug("detected hip devices", "count", count) slog.Debug("detected hip devices", "count", count)
// TODO how to determine the underlying device ID when visible devices is causing this to subset? // TODO how to determine the underlying device ID when visible devices is causing this to subset?
for i := 0; i < count; i++ { for i := range count {
err = hl.HipSetDevice(i) err = hl.HipSetDevice(i)
if err != nil { if err != nil {
slog.Warn("set device", "id", i, "error", err) slog.Warn("set device", "id", i, "error", err)
...@@ -84,14 +85,15 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -84,14 +85,15 @@ func AMDGetGPUInfo() []GpuInfo {
n = bytes.IndexByte(props.GcnArchName[:], 0) n = bytes.IndexByte(props.GcnArchName[:], 0)
gfx := string(props.GcnArchName[:n]) gfx := string(props.GcnArchName[:n])
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx) slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0 // slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
// TODO Why isn't props.iGPU accurate!? // TODO Why isn't props.iGPU accurate!?
if strings.EqualFold(name, iGPUName) { if strings.EqualFold(name, iGPUName) {
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx) slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
continue continue
} }
if gfxOverride == "" { if gfxOverride == "" {
if !slices.Contains[[]string, string](supported, gfx) { // Strip off Target Features when comparing
if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported) slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported)
// TODO - consider discrete markdown just for ROCM troubleshooting? // TODO - consider discrete markdown just for ROCM troubleshooting?
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage") slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
...@@ -113,25 +115,27 @@ func AMDGetGPUInfo() []GpuInfo { ...@@ -113,25 +115,27 @@ func AMDGetGPUInfo() []GpuInfo {
continue continue
} }
// TODO revisit this once ROCm v6 is available on windows.
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory)) slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory)) slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
gpuInfo := GpuInfo{ gpuInfo := RocmGPUInfo{
Library: "rocm", GpuInfo: GpuInfo{
memInfo: memInfo{ Library: "rocm",
TotalMemory: totalMemory, memInfo: memInfo{
FreeMemory: freeMemory, TotalMemory: totalMemory,
FreeMemory: freeMemory,
},
// Free memory reporting on Windows is not reliable until we bump to ROCm v6.2
UnreliableFreeMemory: true,
ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
DependencyPath: libDir,
MinimumMemory: rocmMinimumMemory,
Name: name,
Compute: gfx,
DriverMajor: driverMajor,
DriverMinor: driverMinor,
}, },
ID: fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices index: i,
DependencyPath: libDir,
MinimumMemory: rocmMinimumMemory,
Name: name,
Compute: gfx,
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
// DriverMajor: driverMajor,
// DriverMinor: driverMinor,
} }
resp = append(resp, gpuInfo) resp = append(resp, gpuInfo)
...@@ -157,5 +161,32 @@ func AMDValidateLibDir() (string, error) { ...@@ -157,5 +161,32 @@ func AMDValidateLibDir() (string, error) {
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this // Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm") slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
return "", fmt.Errorf("no suitable rocm found, falling back to CPU") return "", errors.New("no suitable rocm found, falling back to CPU")
}
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
if len(gpus) == 0 {
return nil
}
hl, err := NewHipLib()
if err != nil {
slog.Debug(err.Error())
return nil
}
defer hl.Release()
for i := range gpus {
err := hl.HipSetDevice(gpus[i].index)
if err != nil {
return err
}
freeMemory, _, err := hl.HipMemGetInfo()
if err != nil {
slog.Warn("get mem info", "id", i, "error", err)
continue
}
slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(freeMemory))
gpus[i].FreeMemory = freeMemory
}
return nil
} }
...@@ -26,7 +26,7 @@ func PayloadsDir() (string, error) { ...@@ -26,7 +26,7 @@ func PayloadsDir() (string, error) {
defer lock.Unlock() defer lock.Unlock()
var err error var err error
if payloadsDir == "" { if payloadsDir == "" {
runnersDir := envconfig.RunnersDir runnersDir := envconfig.RunnersDir()
if runnersDir != "" { if runnersDir != "" {
payloadsDir = runnersDir payloadsDir = runnersDir
...@@ -35,27 +35,23 @@ func PayloadsDir() (string, error) { ...@@ -35,27 +35,23 @@ func PayloadsDir() (string, error) {
// The remainder only applies on non-windows where we still carry payloads in the main executable // The remainder only applies on non-windows where we still carry payloads in the main executable
cleanupTmpDirs() cleanupTmpDirs()
tmpDir := envconfig.TmpDir tmpDir := envconfig.TmpDir()
if tmpDir == "" { if tmpDir == "" {
tmpDir, err = os.MkdirTemp("", "ollama") tmpDir, err = os.MkdirTemp("", "ollama")
if err != nil { if err != nil {
return "", fmt.Errorf("failed to generate tmp dir: %w", err) return "", fmt.Errorf("failed to generate tmp dir: %w", err)
} }
} else { } else {
err = os.MkdirAll(tmpDir, 0755) err = os.MkdirAll(tmpDir, 0o755)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err) return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
} }
} }
// Track our pid so we can clean up orphaned tmpdirs // Track our pid so we can clean up orphaned tmpdirs
pidFilePath := filepath.Join(tmpDir, "ollama.pid") n := filepath.Join(tmpDir, "ollama.pid")
pidFile, err := os.OpenFile(pidFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm) if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
if err != nil { return "", fmt.Errorf("failed to write pid file %s: %w", n, err)
return "", err
}
if _, err := pidFile.Write([]byte(fmt.Sprint(os.Getpid()))); err != nil {
return "", err
} }
// We create a distinct subdirectory for payloads within the tmpdir // We create a distinct subdirectory for payloads within the tmpdir
...@@ -67,30 +63,44 @@ func PayloadsDir() (string, error) { ...@@ -67,30 +63,44 @@ func PayloadsDir() (string, error) {
// Best effort to clean up prior tmpdirs // Best effort to clean up prior tmpdirs
func cleanupTmpDirs() { func cleanupTmpDirs() {
dirs, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*")) matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid"))
if err != nil { if err != nil {
return return
} }
for _, d := range dirs {
info, err := os.Stat(d) for _, match := range matches {
if err != nil || !info.IsDir() { raw, err := os.ReadFile(match)
if errors.Is(err, os.ErrNotExist) {
slog.Debug("not a ollama runtime directory, skipping", "path", match)
continue
} else if err != nil {
slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
continue continue
} }
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
if err == nil { pid, err := strconv.Atoi(string(raw))
pid, err := strconv.Atoi(string(raw))
if err == nil {
if proc, err := os.FindProcess(int(pid)); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
// Another running ollama, ignore this tmpdir
continue
}
}
} else {
slog.Debug("failed to open ollama.pid", "path", d, "error", err)
}
err = os.RemoveAll(d)
if err != nil { if err != nil {
slog.Debug("unable to cleanup stale tmpdir", "path", d, "error", err) slog.Warn("invalid pid, skipping", "path", match, "error", err)
continue
}
p, err := os.FindProcess(pid)
if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
slog.Warn("process still running, skipping", "pid", pid, "path", match)
continue
}
if err := os.Remove(match); err != nil {
slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
}
runners := filepath.Join(filepath.Dir(match), "runners")
if err := os.RemoveAll(runners); err != nil {
slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
}
if err := os.Remove(filepath.Dir(match)); err != nil {
slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
} }
} }
} }
...@@ -98,7 +108,7 @@ func cleanupTmpDirs() { ...@@ -98,7 +108,7 @@ func cleanupTmpDirs() {
func Cleanup() { func Cleanup() {
lock.Lock() lock.Lock()
defer lock.Unlock() defer lock.Unlock()
runnersDir := envconfig.RunnersDir runnersDir := envconfig.RunnersDir()
if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" { if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
// We want to fully clean up the tmpdir parent of the payloads dir // We want to fully clean up the tmpdir parent of the payloads dir
tmpDir := filepath.Clean(filepath.Join(payloadsDir, "..")) tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
......
package gpu package gpu
import ( import (
"log/slog" "os"
"path/filepath"
"runtime"
"strings"
"golang.org/x/sys/cpu" "golang.org/x/sys/cpu"
) )
func GetCPUVariant() string { func GetCPUCapability() CPUCapability {
if cpu.X86.HasAVX2 { if cpu.X86.HasAVX2 {
slog.Debug("CPU has AVX2") return CPUCapabilityAVX2
return "avx2"
} }
if cpu.X86.HasAVX { if cpu.X86.HasAVX {
slog.Debug("CPU has AVX") return CPUCapabilityAVX
return "avx"
} }
slog.Debug("CPU does not have vector extensions")
// else LCD // else LCD
return "" return CPUCapabilityNone
}
func IsNUMA() bool {
if runtime.GOOS != "linux" {
// numa support in llama.cpp is linux only
return false
}
ids := map[string]interface{}{}
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
for _, packageId := range packageIds {
id, err := os.ReadFile(packageId)
if err == nil {
ids[strings.TrimSpace(string(id))] = struct{}{}
}
}
return len(ids) > 1
} }
...@@ -18,5 +18,4 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { ...@@ -18,5 +18,4 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
ids = append(ids, info.ID) ids = append(ids, info.ID)
} }
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
} }
...@@ -7,9 +7,9 @@ package gpu ...@@ -7,9 +7,9 @@ package gpu
#cgo windows LDFLAGS: -lpthread #cgo windows LDFLAGS: -lpthread
#include "gpu_info.h" #include "gpu_info.h"
*/ */
import "C" import "C"
import ( import (
"fmt" "fmt"
"log/slog" "log/slog"
...@@ -24,19 +24,37 @@ import ( ...@@ -24,19 +24,37 @@ import (
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
type handles struct { type cudaHandles struct {
deviceCount int deviceCount int
cudart *C.cudart_handle_t cudart *C.cudart_handle_t
nvcuda *C.nvcuda_handle_t nvcuda *C.nvcuda_handle_t
nvml *C.nvml_handle_t
}
type oneapiHandles struct {
oneapi *C.oneapi_handle_t oneapi *C.oneapi_handle_t
deviceCount int
} }
const ( const (
cudaMinimumMemory = 457 * format.MebiByte cudaMinimumMemory = 457 * format.MebiByte
rocmMinimumMemory = 457 * format.MebiByte rocmMinimumMemory = 457 * format.MebiByte
// TODO OneAPI minimum memory
) )
var gpuMutex sync.Mutex var (
gpuMutex sync.Mutex
bootstrapped bool
cpuCapability CPUCapability
cpus []CPUInfo
cudaGPUs []CudaGPUInfo
nvcudaLibPath string
cudartLibPath string
oneapiLibPath string
nvmlLibPath string
rocmGPUs []RocmGPUInfo
oneapiGPUs []OneapiGPUInfo
)
// With our current CUDA compile flags, older than 5.0 will not work properly // With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0} var CudaComputeMin = [2]C.int{5, 0}
...@@ -46,113 +64,112 @@ var RocmComputeMin = 9 ...@@ -46,113 +64,112 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory // TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
var CudartLinuxGlobs = []string{
"/usr/local/cuda/lib64/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/x86_64-linux-gnu/libcudart.so*",
"/usr/lib/wsl/lib/libcudart.so*",
"/usr/lib/wsl/drivers/*/libcudart.so*",
"/opt/cuda/lib64/libcudart.so*",
"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
"/usr/lib/aarch64-linux-gnu/libcudart.so*",
"/usr/local/cuda/lib*/libcudart.so*",
"/usr/lib*/libcudart.so*",
"/usr/local/lib*/libcudart.so*",
}
var CudartWindowsGlobs = []string{
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}
var NvcudaLinuxGlobs = []string{
"/usr/local/cuda*/targets/*/lib/libcuda.so*",
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/*-linux-gnu/libcuda.so*",
"/usr/lib/wsl/lib/libcuda.so*",
"/usr/lib/wsl/drivers/*/libcuda.so*",
"/opt/cuda/lib*/libcuda.so*",
"/usr/local/cuda/lib*/libcuda.so*",
"/usr/lib*/libcuda.so*",
"/usr/local/lib*/libcuda.so*",
}
var NvcudaWindowsGlobs = []string{
"c:\\windows\\system*\\nvcuda.dll",
}
var OneapiWindowsGlobs = []string{
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}
var OneapiLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
"/usr/lib*/libze_intel_gpu.so*",
}
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed. // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK") var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held // Note: gpuMutex must already be held
func initGPUHandles() *handles { func initCudaHandles() *cudaHandles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
gpuHandles := &handles{} cHandles := &cudaHandles{}
var cudartMgmtName string // Short Circuit if we already know which library to use
if nvmlLibPath != "" {
cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
return cHandles
}
if nvcudaLibPath != "" {
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
return cHandles
}
if cudartLibPath != "" {
cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
return cHandles
}
slog.Debug("searching for GPU discovery libraries for NVIDIA")
var cudartMgmtPatterns []string var cudartMgmtPatterns []string
var nvcudaMgmtName string
var nvcudaMgmtPatterns []string
tmpDir, _ := PayloadsDir() // Aligned with driver, we can't carry as payloads
switch runtime.GOOS { nvcudaMgmtPatterns := NvcudaGlobs
case "windows":
cudartMgmtName = "cudart64_*.dll" if runtime.GOOS == "windows" {
localAppData := os.Getenv("LOCALAPPDATA") localAppData := os.Getenv("LOCALAPPDATA")
cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)} cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...) }
// Aligned with driver, we can't carry as payloads tmpDir, _ := PayloadsDir()
nvcudaMgmtName = "nvcuda.dll" if tmpDir != "" {
nvcudaMgmtPatterns = NvcudaWindowsGlobs // TODO - add "payloads" for subprocess
case "linux": cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
cudartMgmtName = "libcudart.so*" }
if tmpDir != "" { cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
// TODO - add "payloads" for subprocess
cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)} if len(NvmlGlobs) > 0 {
nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
if len(nvmlLibPaths) > 0 {
nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
if nvml != nil {
slog.Debug("nvidia-ml loaded", "library", libPath)
cHandles.nvml = nvml
nvmlLibPath = libPath
}
} }
cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "libcuda.so*"
nvcudaMgmtPatterns = NvcudaLinuxGlobs
default:
return gpuHandles
} }
slog.Debug("Detecting GPUs") nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
if len(nvcudaLibPaths) > 0 { if len(nvcudaLibPaths) > 0 {
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths) deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
if nvcuda != nil { if nvcuda != nil {
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath) slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
gpuHandles.nvcuda = nvcuda cHandles.nvcuda = nvcuda
gpuHandles.deviceCount = deviceCount cHandles.deviceCount = deviceCount
return gpuHandles nvcudaLibPath = libPath
return cHandles
} }
} }
cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns) cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
if len(cudartLibPaths) > 0 { if len(cudartLibPaths) > 0 {
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths) deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
if cudart != nil { if cudart != nil {
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount) slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
gpuHandles.cudart = cudart cHandles.cudart = cudart
gpuHandles.deviceCount = deviceCount cHandles.deviceCount = deviceCount
return gpuHandles cudartLibPath = libPath
return cHandles
} }
} }
return gpuHandles return cHandles
}
// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
oHandles := &oneapiHandles{}
// Short Circuit if we already know which library to use
if oneapiLibPath != "" {
oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
return oHandles
}
oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
if len(oneapiLibPaths) > 0 {
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
}
return oHandles
}
func GetCPUInfo() GpuInfoList {
gpuMutex.Lock()
if !bootstrapped {
gpuMutex.Unlock()
GetGPUInfo()
} else {
gpuMutex.Unlock()
}
return GpuInfoList{cpus[0].GpuInfo}
} }
func GetGPUInfo() GpuInfoList { func GetGPUInfo() GpuInfoList {
...@@ -160,110 +177,291 @@ func GetGPUInfo() GpuInfoList { ...@@ -160,110 +177,291 @@ func GetGPUInfo() GpuInfoList {
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries // GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex.Lock() gpuMutex.Lock()
defer gpuMutex.Unlock() defer gpuMutex.Unlock()
needRefresh := true
gpuHandles := initGPUHandles() var cHandles *cudaHandles
var oHandles *oneapiHandles
defer func() { defer func() {
if gpuHandles.cudart != nil { if cHandles != nil {
C.cudart_release(*gpuHandles.cudart) if cHandles.cudart != nil {
C.cudart_release(*cHandles.cudart)
}
if cHandles.nvcuda != nil {
C.nvcuda_release(*cHandles.nvcuda)
}
if cHandles.nvml != nil {
C.nvml_release(*cHandles.nvml)
}
} }
if gpuHandles.nvcuda != nil { if oHandles != nil {
C.nvcuda_release(*gpuHandles.nvcuda) if oHandles.oneapi != nil {
// TODO - is this needed?
C.oneapi_release(*oHandles.oneapi)
}
} }
}() }()
// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX if !bootstrapped {
cpuVariant := GetCPUVariant() slog.Info("looking for compatible GPUs")
if cpuVariant == "" && runtime.GOARCH == "amd64" { needRefresh = false
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.") cpuCapability = GetCPUCapability()
} var memInfo C.mem_info_t
// On windows we bundle the nvidia library one level above the runner dir mem, err := GetCPUMem()
depPath := "" if err != nil {
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" { slog.Warn("error looking up system memory", "error", err)
depPath = filepath.Dir(envconfig.RunnersDir) }
} cpus = []CPUInfo{
{
GpuInfo: GpuInfo{
memInfo: mem,
Library: "cpu",
Variant: cpuCapability,
ID: "0",
},
},
}
var memInfo C.mem_info_t // Fallback to CPU mode if we're lacking required vector extensions on x86
resp := []GpuInfo{} if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
bootstrapped = true
// No need to do any GPU discovery, since we can't run on them
return GpuInfoList{cpus[0].GpuInfo}
}
// NVIDIA first // On windows we bundle the nvidia library one level above the runner dir
for i := 0; i < gpuHandles.deviceCount; i++ { depPath := ""
// TODO once we support CPU compilation variants of GPU libraries refine this... if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
if cpuVariant == "" && runtime.GOARCH == "amd64" { depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda")
continue
} }
if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
gpuInfo := GpuInfo{ // Load ALL libraries
Library: "cuda", cHandles = initCudaHandles()
// NVIDIA
for i := range cHandles.deviceCount {
if cHandles.cudart != nil || cHandles.nvcuda != nil {
gpuInfo := CudaGPUInfo{
GpuInfo: GpuInfo{
Library: "cuda",
},
index: i,
}
var driverMajor int
var driverMinor int
if cHandles.cudart != nil {
C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
} else {
C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
driverMajor = int(cHandles.nvcuda.driver_major)
driverMinor = int(cHandles.nvcuda.driver_minor)
}
if memInfo.err != nil {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
continue
}
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
continue
}
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor
// query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates
if cHandles.nvml != nil {
C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
if memInfo.err != nil {
slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
} else {
if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
slog.Info("detected OS VRAM overhead",
"id", gpuInfo.ID,
"library", gpuInfo.Library,
"compute", gpuInfo.Compute,
"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
"name", gpuInfo.Name,
"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
)
}
}
}
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
cudaGPUs = append(cudaGPUs, gpuInfo)
}
}
// Intel
if envconfig.IntelGPU() {
oHandles = initOneAPIHandles()
if oHandles != nil && oHandles.oneapi != nil {
// On windows we bundle the oneapi library one level above the runner dir
depPath = ""
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi")
}
for d := range oHandles.oneapi.num_drivers {
if oHandles.oneapi == nil {
// shouldn't happen
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
continue
}
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
for i := range devCount {
gpuInfo := OneapiGPUInfo{
GpuInfo: GpuInfo{
Library: "oneapi",
},
driverIndex: int(d),
gpuIndex: int(i),
}
// TODO - split bootstrapping from updating free memory
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DependencyPath = depPath
oneapiGPUs = append(oneapiGPUs, gpuInfo)
}
}
} }
var driverMajor int }
var driverMinor int
if gpuHandles.cudart != nil { rocmGPUs = AMDGetGPUInfo()
C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo) bootstrapped = true
if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
slog.Info("no compatible GPUs were discovered")
}
}
// For detected GPUs, load library if not loaded
// Refresh free memory usage
if needRefresh {
mem, err := GetCPUMem()
if err != nil {
slog.Warn("error looking up system memory", "error", err)
} else {
slog.Debug("updating system memory data",
slog.Group(
"before",
"total", format.HumanBytes2(cpus[0].TotalMemory),
"free", format.HumanBytes2(cpus[0].FreeMemory),
"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
),
slog.Group(
"now",
"total", format.HumanBytes2(mem.TotalMemory),
"free", format.HumanBytes2(mem.FreeMemory),
"free_swap", format.HumanBytes2(mem.FreeSwap),
),
)
cpus[0].FreeMemory = mem.FreeMemory
cpus[0].FreeSwap = mem.FreeSwap
}
var memInfo C.mem_info_t
if cHandles == nil && len(cudaGPUs) > 0 {
cHandles = initCudaHandles()
}
for i, gpu := range cudaGPUs {
if cHandles.nvml != nil {
C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
} else if cHandles.cudart != nil {
C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
} else if cHandles.nvcuda != nil {
C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
memInfo.used = memInfo.total - memInfo.free
} else { } else {
C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo) // shouldn't happen
driverMajor = int(gpuHandles.nvcuda.driver_major) slog.Warn("no valid cuda library loaded to refresh vram usage")
driverMinor = int(gpuHandles.nvcuda.driver_minor) break
} }
if memInfo.err != nil { if memInfo.err != nil {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err)) slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err)) C.free(unsafe.Pointer(memInfo.err))
continue continue
} }
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) { if memInfo.free == 0 {
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor)) slog.Warn("error looking up nvidia GPU memory")
continue continue
} }
gpuInfo.TotalMemory = uint64(memInfo.total) if cHandles.nvml != nil && gpu.OSOverhead > 0 {
gpuInfo.FreeMemory = uint64(memInfo.free) // When using the management library update based on recorded overhead
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) memInfo.free -= C.uint64_t(gpu.OSOverhead)
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) }
gpuInfo.MinimumMemory = cudaMinimumMemory slog.Debug("updating cuda memory data",
gpuInfo.DependencyPath = depPath "gpu", gpu.ID,
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) "name", gpu.Name,
gpuInfo.DriverMajor = int(driverMajor) "overhead", format.HumanBytes2(gpu.OSOverhead),
gpuInfo.DriverMinor = int(driverMinor) slog.Group(
"before",
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does... "total", format.HumanBytes2(gpu.TotalMemory),
resp = append(resp, gpuInfo) "free", format.HumanBytes2(gpu.FreeMemory),
),
slog.Group(
"now",
"total", format.HumanBytes2(uint64(memInfo.total)),
"free", format.HumanBytes2(uint64(memInfo.free)),
"used", format.HumanBytes2(uint64(memInfo.used)),
),
)
cudaGPUs[i].FreeMemory = uint64(memInfo.free)
} }
}
// Then AMD
resp = append(resp, AMDGetGPUInfo()...)
if len(resp) == 0 { if oHandles == nil && len(oneapiGPUs) > 0 {
C.cpu_check_ram(&memInfo) oHandles = initOneAPIHandles()
if memInfo.err != nil {
slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
return resp
} }
gpuInfo := GpuInfo{ for i, gpu := range oneapiGPUs {
Library: "cpu", if oHandles.oneapi == nil {
Variant: cpuVariant, // shouldn't happen
slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
continue
}
C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
} }
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
resp = append(resp, gpuInfo) err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
if err != nil {
slog.Debug("problem refreshing ROCm free memory", "error", err)
}
} }
return resp resp := []GpuInfo{}
} for _, gpu := range cudaGPUs {
resp = append(resp, gpu.GpuInfo)
func GetCPUMem() (memInfo, error) { }
var ret memInfo for _, gpu := range rocmGPUs {
var info C.mem_info_t resp = append(resp, gpu.GpuInfo)
C.cpu_check_ram(&info) }
if info.err != nil { for _, gpu := range oneapiGPUs {
defer C.free(unsafe.Pointer(info.err)) resp = append(resp, gpu.GpuInfo)
return ret, fmt.Errorf(C.GoString(info.err))
} }
ret.FreeMemory = uint64(info.free) if len(resp) == 0 {
ret.TotalMemory = uint64(info.total) resp = append(resp, cpus[0].GpuInfo)
return ret, nil }
return resp
} }
func FindGPULibs(baseLibName string, defaultPatterns []string) []string { func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
...@@ -296,6 +494,7 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string { ...@@ -296,6 +494,7 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
// Nvidia PhysX known to return bogus results // Nvidia PhysX known to return bogus results
if strings.Contains(pattern, "PhysX") { if strings.Contains(pattern, "PhysX") {
slog.Debug("skipping PhysX cuda library path", "path", pattern) slog.Debug("skipping PhysX cuda library path", "path", pattern)
continue
} }
// Ignore glob discovery errors // Ignore glob discovery errors
matches, _ := filepath.Glob(pattern) matches, _ := filepath.Glob(pattern)
...@@ -352,7 +551,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) { ...@@ -352,7 +551,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
defer C.free(unsafe.Pointer(lib)) defer C.free(unsafe.Pointer(lib))
C.nvcuda_init(lib, &resp) C.nvcuda_init(lib, &resp)
if resp.err != nil { if resp.err != nil {
slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err)) // Decide what log level based on the type of error message to help users understand why
msg := C.GoString(resp.err)
switch resp.cudaErr {
case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
case C.CUDA_ERROR_NO_DEVICE:
slog.Info("no nvidia devices detected", "library", libPath)
case C.CUDA_ERROR_UNKNOWN:
slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
default:
if strings.Contains(msg, "wrong ELF class") {
slog.Debug("skipping 32bit library", "library", libPath)
} else {
slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
}
}
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return int(resp.num_devices), &resp.ch, libPath return int(resp.num_devices), &resp.ch, libPath
...@@ -361,8 +576,26 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) { ...@@ -361,8 +576,26 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
return 0, nil, "" return 0, nil, ""
} }
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
var resp C.nvml_init_resp_t
resp.ch.verbose = getVerboseState()
for _, libPath := range nvmlLibPaths {
lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib))
C.nvml_init(lib, &resp)
if resp.err != nil {
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
C.free(unsafe.Pointer(resp.err))
} else {
return &resp.ch, libPath
}
}
return nil, ""
}
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
var resp C.oneapi_init_resp_t var resp C.oneapi_init_resp_t
num_devices := 0
resp.oh.verbose = getVerboseState() resp.oh.verbose = getVerboseState()
for _, libPath := range oneapiLibPaths { for _, libPath := range oneapiLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
...@@ -372,14 +605,17 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { ...@@ -372,14 +605,17 @@ func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err)) slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return int(resp.num_devices), &resp.oh, libPath for i := range resp.oh.num_drivers {
num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
}
return num_devices, &resp.oh, libPath
} }
} }
return 0, nil, "" return 0, nil, ""
} }
func getVerboseState() C.uint16_t { func getVerboseState() C.uint16_t {
if envconfig.Debug { if envconfig.Debug() {
return C.uint16_t(1) return C.uint16_t(1)
} }
return C.uint16_t(0) return C.uint16_t(0)
......
...@@ -8,6 +8,7 @@ package gpu ...@@ -8,6 +8,7 @@ package gpu
#include "gpu_info_darwin.h" #include "gpu_info_darwin.h"
*/ */
import "C" import "C"
import ( import (
"runtime" "runtime"
...@@ -24,7 +25,7 @@ func GetGPUInfo() GpuInfoList { ...@@ -24,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return []GpuInfo{ return []GpuInfo{
{ {
Library: "cpu", Library: "cpu",
Variant: GetCPUVariant(), Variant: GetCPUCapability(),
memInfo: mem, memInfo: mem,
}, },
} }
...@@ -42,10 +43,22 @@ func GetGPUInfo() GpuInfoList { ...@@ -42,10 +43,22 @@ func GetGPUInfo() GpuInfoList {
return []GpuInfo{info} return []GpuInfo{info}
} }
func GetCPUInfo() GpuInfoList {
mem, _ := GetCPUMem()
return []GpuInfo{
{
Library: "cpu",
Variant: GetCPUCapability(),
memInfo: mem,
},
}
}
func GetCPUMem() (memInfo, error) { func GetCPUMem() (memInfo, error) {
return memInfo{ return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()), TotalMemory: uint64(C.getPhysicalMemory()),
FreeMemory: 0, FreeMemory: uint64(C.getFreeMemory()),
// FreeSwap omitted as Darwin uses dynamic paging
}, nil }, nil
} }
......
...@@ -47,6 +47,7 @@ typedef struct mem_info { ...@@ -47,6 +47,7 @@ typedef struct mem_info {
char gpu_name[GPU_NAME_LEN]; char gpu_name[GPU_NAME_LEN];
uint64_t total; uint64_t total;
uint64_t free; uint64_t free;
uint64_t used;
// Compute Capability // Compute Capability
int major; int major;
...@@ -62,7 +63,8 @@ void cpu_check_ram(mem_info_t *resp); ...@@ -62,7 +63,8 @@ void cpu_check_ram(mem_info_t *resp);
#include "gpu_info_cudart.h" #include "gpu_info_cudart.h"
#include "gpu_info_nvcuda.h" #include "gpu_info_nvcuda.h"
#include "gpu_info_nvml.h"
#include "gpu_info_oneapi.h" #include "gpu_info_oneapi.h"
#endif // __GPU_INFO_H__ #endif // __GPU_INFO_H__
#endif // __APPLE__ #endif // __APPLE__
\ No newline at end of file
...@@ -40,7 +40,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { ...@@ -40,7 +40,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
for (i = 0; l[i].s != NULL; i++) { for (i = 0; l[i].s != NULL; i++) {
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s); *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
if (!l[i].p) { if (!*(l[i].p)) {
char *msg = LOAD_ERR(); char *msg = LOAD_ERR();
LOG(resp->ch.verbose, "dlerr: %s\n", msg); LOG(resp->ch.verbose, "dlerr: %s\n", msg);
UNLOAD_LIBRARY(resp->ch.handle); UNLOAD_LIBRARY(resp->ch.handle);
...@@ -94,7 +94,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) { ...@@ -94,7 +94,7 @@ void cudart_init(char *cudart_lib_path, cudart_init_resp_t *resp) {
} }
void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) { void cudart_bootstrap(cudart_handle_t h, int i, mem_info_t *resp) {
resp->err = NULL; resp->err = NULL;
cudartMemory_t memInfo = {0,0,0}; cudartMemory_t memInfo = {0,0,0};
cudartReturn_t ret; cudartReturn_t ret;
...@@ -166,9 +166,11 @@ void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) { ...@@ -166,9 +166,11 @@ void cudart_check_vram(cudart_handle_t h, int i, mem_info_t *resp) {
resp->total = memInfo.total; resp->total = memInfo.total;
resp->free = memInfo.free; resp->free = memInfo.free;
resp->used = memInfo.used;
LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total); LOG(h.verbose, "[%s] CUDA totalMem %lu\n", resp->gpu_id, resp->total);
LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free); LOG(h.verbose, "[%s] CUDA freeMem %lu\n", resp->gpu_id, resp->free);
LOG(h.verbose, "[%s] CUDA usedMem %lu\n", resp->gpu_id, resp->used);
LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor); LOG(h.verbose, "[%s] Compute Capability %d.%d\n", resp->gpu_id, resp->major, resp->minor);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment