llm.go 3.33 KB
Newer Older
1
2
3
package llm

import (
4
	"context"
5
	"fmt"
6
	"log"
7
	"os"
8
	"runtime"
9

10
11
	"github.com/pbnjay/memory"

12
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
13
	"github.com/jmorganca/ollama/format"
14
15
16
)

type LLM interface {
17
18
19
20
	Predict(context.Context, []int, string, func(api.GenerateResponse)) error
	Embedding(context.Context, string) ([]float64, error)
	Encode(context.Context, string) ([]int, error)
	Decode(context.Context, []int) (string, error)
21
22
	SetOptions(api.Options)
	Close()
23
	Ping(context.Context) error
24
25
}

26
func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) {
27
28
29
30
31
32
33
34
	if _, err := os.Stat(model); err != nil {
		return nil, err
	}

	f, err := os.Open(model)
	if err != nil {
		return nil, err
	}
Michael Yang's avatar
Michael Yang committed
35
	defer f.Close()
36

Bruce MacDonald's avatar
Bruce MacDonald committed
37
	ggml, err := DecodeGGML(f)
38
39
40
41
	if err != nil {
		return nil, err
	}

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
	if runtime.GOOS == "darwin" {
		switch ggml.FileType() {
		case "Q8_0":
			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
				// GGML Q8_0 do not support Metal API and will
				// cause the runner to segmentation fault so disable GPU
				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
				opts.NumGPU = 0
			}
		case "F32", "Q5_0", "Q5_1":
			if opts.NumGPU != 0 {
				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
				// cause the runner to segmentation fault so disable GPU
				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
				opts.NumGPU = 0
			}
58
59
60
		}
	}

61
	totalResidentMemory := memory.TotalMemory()
Michael Yang's avatar
Michael Yang committed
62
	switch ggml.ModelType() {
Michael Yang's avatar
Michael Yang committed
63
	case "3B", "7B":
Michael Yang's avatar
Michael Yang committed
64
		if ggml.FileType() == "F16" && totalResidentMemory < 16*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
65
			return nil, fmt.Errorf("F16 model requires at least 16 GB of memory")
Michael Yang's avatar
Michael Yang committed
66
		} else if totalResidentMemory < 8*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
67
			return nil, fmt.Errorf("model requires at least 8 GB of memory")
68
		}
Michael Yang's avatar
Michael Yang committed
69
	case "13B":
Michael Yang's avatar
Michael Yang committed
70
		if ggml.FileType() == "F16" && totalResidentMemory < 32*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
71
			return nil, fmt.Errorf("F16 model requires at least 32 GB of memory")
Michael Yang's avatar
Michael Yang committed
72
		} else if totalResidentMemory < 16*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
73
			return nil, fmt.Errorf("model requires at least 16 GB of memory")
74
		}
Michael Yang's avatar
Michael Yang committed
75
	case "30B", "34B", "40B":
Michael Yang's avatar
Michael Yang committed
76
		if ggml.FileType() == "F16" && totalResidentMemory < 64*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
77
			return nil, fmt.Errorf("F16 model requires at least 64 GB of memory")
Michael Yang's avatar
Michael Yang committed
78
		} else if totalResidentMemory < 32*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
79
			return nil, fmt.Errorf("model requires at least 32 GB of memory")
80
		}
Michael Yang's avatar
Michael Yang committed
81
	case "65B", "70B":
Michael Yang's avatar
Michael Yang committed
82
		if ggml.FileType() == "F16" && totalResidentMemory < 128*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
83
			return nil, fmt.Errorf("F16 model requires at least 128 GB of memory")
Michael Yang's avatar
Michael Yang committed
84
		} else if totalResidentMemory < 64*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
85
			return nil, fmt.Errorf("model requires at least 64 GB of memory")
86
		}
Michael Yang's avatar
Michael Yang committed
87
	case "180B":
Michael Yang's avatar
Michael Yang committed
88
		if ggml.FileType() == "F16" && totalResidentMemory < 512*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
89
			return nil, fmt.Errorf("F16 model requires at least 512GB of memory")
Michael Yang's avatar
Michael Yang committed
90
		} else if totalResidentMemory < 128*format.GigaByte {
Michael Yang's avatar
Michael Yang committed
91
92
			return nil, fmt.Errorf("model requires at least 128GB of memory")
		}
93
94
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
95
96
97
	switch ggml.Name() {
	case "gguf":
		opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
98
		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
Bruce MacDonald's avatar
Bruce MacDonald committed
99
	case "ggml", "ggmf", "ggjt", "ggla":
100
		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
101
	default:
Michael Yang's avatar
Michael Yang committed
102
		return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
103
104
	}
}