llm.go 2.19 KB
Newer Older
1
2
package llm

3
4
5
6
// #cgo CFLAGS: -Illama.cpp
// #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
// #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
7
// #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
8
9
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
Michael Yang's avatar
Michael Yang committed
10
// #include <stdlib.h>
11
12
// #include "llama.h"
import "C"
Michael Yang's avatar
Michael Yang committed
13
14
import (
	"fmt"
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
17
	"unsafe"
)
18
19
20
21

// SystemInfo is an unused example of calling llama.cpp functions using CGo
func SystemInfo() string {
	return C.GoString(C.llama_print_system_info())
22
}
Michael Yang's avatar
Michael Yang committed
23

Michael Yang's avatar
Michael Yang committed
24
func Quantize(infile, outfile string, ftype fileType) error {
Michael Yang's avatar
Michael Yang committed
25
26
27
28
29
30
31
32
	cinfile := C.CString(infile)
	defer C.free(unsafe.Pointer(cinfile))

	coutfile := C.CString(outfile)
	defer C.free(unsafe.Pointer(coutfile))

	params := C.llama_model_quantize_default_params()
	params.nthread = -1
Michael Yang's avatar
Michael Yang committed
33
	params.ftype = ftype.Value()
Michael Yang's avatar
Michael Yang committed
34

Michael Yang's avatar
Michael Yang committed
35
36
	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
		return fmt.Errorf("llama_model_quantize: %d", rc)
Michael Yang's avatar
Michael Yang committed
37
38
39
40
	}

	return nil
}
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

type llamaModel struct {
	m *C.struct_llama_model
}

func newLlamaModel(p string) *llamaModel {
	cs := C.CString(p)
	defer C.free(unsafe.Pointer(cs))

	return &llamaModel{
		C.llama_load_model_from_file(
			cs,
			C.llama_model_default_params(),
		),
	}
}

func (llm *llamaModel) Close() {
	C.llama_free_model(llm.m)
}

func (llm *llamaModel) Tokenize(s string) []int {
	cs := C.CString(s)
	defer C.free(unsafe.Pointer(cs))

	tokens := make([]int, len(s)+2)
	if n := C.llama_tokenize(llm.m, cs, C.int(len(s)), (*C.llama_token)(unsafe.Pointer(&tokens[0])), C.int(len(s)+2), false, true); n > 0 {
		return tokens[:n]
	}

	return nil
}

func (llm *llamaModel) Detokenize(i32s []int) string {
	var sb strings.Builder
	for _, i32 := range i32s {
		c := make([]byte, 512)
		if n := C.llama_token_to_piece(llm.m, C.llama_token(i32), (*C.char)(unsafe.Pointer(&c[0])), C.int(len(c)), false); n > 0 {
			sb.WriteString(unsafe.String(&c[0], n))
		}
	}

	return sb.String()
}