llm.go 3.22 KB
Newer Older
1
2
3
package llm

import (
4
	"context"
5
	"fmt"
6
	"log"
7
	"os"
8
	"runtime"
9

10
11
	"github.com/pbnjay/memory"

12
13
14
15
	"github.com/jmorganca/ollama/api"
)

type LLM interface {
16
17
18
19
	Predict(context.Context, []int, string, func(api.GenerateResponse)) error
	Embedding(context.Context, string) ([]float64, error)
	Encode(context.Context, string) ([]int, error)
	Decode(context.Context, []int) (string, error)
20
21
	SetOptions(api.Options)
	Close()
22
	Ping(context.Context) error
23
24
}

25
func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) {
26
27
28
29
30
31
32
33
	if _, err := os.Stat(model); err != nil {
		return nil, err
	}

	f, err := os.Open(model)
	if err != nil {
		return nil, err
	}
Michael Yang's avatar
Michael Yang committed
34
	defer f.Close()
35

Bruce MacDonald's avatar
Bruce MacDonald committed
36
	ggml, err := DecodeGGML(f)
37
38
39
40
	if err != nil {
		return nil, err
	}

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
	if runtime.GOOS == "darwin" {
		switch ggml.FileType() {
		case "Q8_0":
			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
				// GGML Q8_0 do not support Metal API and will
				// cause the runner to segmentation fault so disable GPU
				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
				opts.NumGPU = 0
			}
		case "F32", "Q5_0", "Q5_1":
			if opts.NumGPU != 0 {
				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
				// cause the runner to segmentation fault so disable GPU
				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
				opts.NumGPU = 0
			}
57
58
59
		}
	}

60
	totalResidentMemory := memory.TotalMemory()
Michael Yang's avatar
Michael Yang committed
61
	switch ggml.ModelType() {
Michael Yang's avatar
Michael Yang committed
62
63
	case "3B", "7B":
		if ggml.FileType() == "F16" && totalResidentMemory < 16*1024*1024 {
Michael Yang's avatar
Michael Yang committed
64
65
			return nil, fmt.Errorf("F16 model requires at least 16GB of memory")
		} else if totalResidentMemory < 8*1024*1024 {
66
67
			return nil, fmt.Errorf("model requires at least 8GB of memory")
		}
Michael Yang's avatar
Michael Yang committed
68
69
	case "13B":
		if ggml.FileType() == "F16" && totalResidentMemory < 32*1024*1024 {
Michael Yang's avatar
Michael Yang committed
70
71
			return nil, fmt.Errorf("F16 model requires at least 32GB of memory")
		} else if totalResidentMemory < 16*1024*1024 {
72
73
			return nil, fmt.Errorf("model requires at least 16GB of memory")
		}
Michael Yang's avatar
Michael Yang committed
74
75
	case "30B", "34B", "40B":
		if ggml.FileType() == "F16" && totalResidentMemory < 64*1024*1024 {
Michael Yang's avatar
Michael Yang committed
76
77
			return nil, fmt.Errorf("F16 model requires at least 64GB of memory")
		} else if totalResidentMemory < 32*1024*1024 {
78
79
			return nil, fmt.Errorf("model requires at least 32GB of memory")
		}
Michael Yang's avatar
Michael Yang committed
80
81
	case "65B", "70B":
		if ggml.FileType() == "F16" && totalResidentMemory < 128*1024*1024 {
Michael Yang's avatar
Michael Yang committed
82
83
			return nil, fmt.Errorf("F16 model requires at least 128GB of memory")
		} else if totalResidentMemory < 64*1024*1024 {
84
85
			return nil, fmt.Errorf("model requires at least 64GB of memory")
		}
Michael Yang's avatar
Michael Yang committed
86
87
88
89
90
91
	case "180B":
		if ggml.FileType() == "F16" && totalResidentMemory < 512*1024*1024 {
			return nil, fmt.Errorf("F16 model requires at least 512GB of memory")
		} else if totalResidentMemory < 128*1024*1024 {
			return nil, fmt.Errorf("model requires at least 128GB of memory")
		}
92
93
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
94
95
96
	switch ggml.Name() {
	case "gguf":
		opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
97
		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
Bruce MacDonald's avatar
Bruce MacDonald committed
98
	case "ggml", "ggmf", "ggjt", "ggla":
99
		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
100
	default:
Michael Yang's avatar
Michael Yang committed
101
		return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
102
103
	}
}