llm.go 1.95 KB
Newer Older
1
2
package llm

3
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
4
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
5
6
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
7
8
// #cgo windows,amd64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/amd64_static -L${SRCDIR}/build/windows/amd64_static/src -L${SRCDIR}/build/windows/amd64_static/ggml/src
// #cgo windows,arm64 LDFLAGS: -static-libstdc++ -static-libgcc -static -L${SRCDIR}/build/windows/arm64_static -L${SRCDIR}/build/windows/arm64_static/src -L${SRCDIR}/build/windows/arm64_static/ggml/src
9
10
// #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
// #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
Michael Yang's avatar
Michael Yang committed
11
// #include <stdlib.h>
12
13
// #include "llama.h"
import "C"
Michael Yang's avatar
Michael Yang committed
14
15
16
17
import (
	"fmt"
	"unsafe"
)
18
19
20
21

// SystemInfo is an unused example of calling llama.cpp functions using CGo
func SystemInfo() string {
	return C.GoString(C.llama_print_system_info())
22
}
Michael Yang's avatar
Michael Yang committed
23

Michael Yang's avatar
Michael Yang committed
24
func Quantize(infile, outfile string, ftype fileType) error {
Michael Yang's avatar
Michael Yang committed
25
26
27
28
29
30
31
32
	cinfile := C.CString(infile)
	defer C.free(unsafe.Pointer(cinfile))

	coutfile := C.CString(outfile)
	defer C.free(unsafe.Pointer(coutfile))

	params := C.llama_model_quantize_default_params()
	params.nthread = -1
Michael Yang's avatar
Michael Yang committed
33
	params.ftype = ftype.Value()
Michael Yang's avatar
Michael Yang committed
34

Michael Yang's avatar
Michael Yang committed
35
	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
Josh's avatar
Josh committed
36
		return fmt.Errorf("failed to quantize model. This model architecture may not be supported, or you may need to upgrade Ollama to the latest version")
Michael Yang's avatar
Michael Yang committed
37
38
39
40
	}

	return nil
}