llm.go 1.95 KB
Newer Older
1
2
package llm

3
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
4
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
5
6
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
7
8
// #cgo windows,amd64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src
// #cgo windows,arm64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/arm64_static -L${SRCDIR}/build/windows/arm64_static/src -L${SRCDIR}/build/windows/arm64_static/ggml/src
9
10
// #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
// #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
Michael Yang's avatar
Michael Yang committed
11
// #include <stdlib.h>
12
13
// #include "llama.h"
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
14

Michael Yang's avatar
Michael Yang committed
15
import (
Michael Yang's avatar
lint  
Michael Yang committed
16
	"errors"
Michael Yang's avatar
Michael Yang committed
17
18
	"unsafe"
)
19
20
21
22

// SystemInfo is an unused example of calling llama.cpp functions using CGo
func SystemInfo() string {
	return C.GoString(C.llama_print_system_info())
23
}
Michael Yang's avatar
Michael Yang committed
24

Michael Yang's avatar
Michael Yang committed
25
func Quantize(infile, outfile string, ftype fileType) error {
Michael Yang's avatar
Michael Yang committed
26
27
28
29
30
31
32
33
	cinfile := C.CString(infile)
	defer C.free(unsafe.Pointer(cinfile))

	coutfile := C.CString(outfile)
	defer C.free(unsafe.Pointer(coutfile))

	params := C.llama_model_quantize_default_params()
	params.nthread = -1
Michael Yang's avatar
Michael Yang committed
34
	params.ftype = ftype.Value()
Michael Yang's avatar
Michael Yang committed
35

Michael Yang's avatar
Michael Yang committed
36
	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
Michael Yang's avatar
lint  
Michael Yang committed
37
		return errors.New("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
Michael Yang's avatar
Michael Yang committed
38
39
40
41
	}

	return nil
}