Commit be330174 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Allow setting max vram for workarounds

Until we get all the memory calculations correct, this can provide
and escape valve for users to workaround out of memory crashes.
parent ce9f7c46
...@@ -242,6 +242,15 @@ func getCPUMem() (memInfo, error) { ...@@ -242,6 +242,15 @@ func getCPUMem() (memInfo, error) {
} }
func CheckVRAM() (int64, error) { func CheckVRAM() (int64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return avail, nil
}
gpuInfo := GetGPUInfo() gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") { if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") {
// leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead // leave 10% or 1024MiB of VRAM free per GPU to handle unaccounted for overhead
......
//go:build darwin //go:build darwin
package gpu package gpu
/* /*
#cgo CFLAGS: -x objective-c #cgo CFLAGS: -x objective-c
#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal #cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
...@@ -8,11 +9,25 @@ package gpu ...@@ -8,11 +9,25 @@ package gpu
*/ */
import "C" import "C"
import ( import (
"fmt"
"log/slog"
"os"
"runtime" "runtime"
"strconv"
) )
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs // CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (int64, error) { func CheckVRAM() (int64, error) {
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseInt(userLimit, 10, 64)
if err != nil {
return 0, fmt.Errorf("Invalid OLLAMA_MAX_VRAM setting %s: %s", userLimit, err)
}
slog.Info(fmt.Sprintf("user override OLLAMA_MAX_VRAM=%d", avail))
return avail, nil
}
if runtime.GOARCH == "amd64" { if runtime.GOARCH == "amd64" {
// gpu not supported, this may not be metal // gpu not supported, this may not be metal
return 0, nil return 0, nil
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment