"tests/vscode:/vscode.git/clone" did not exist on "5a2b77a6c1dc54372d0569c6a681c69895eab904"
Commit 95005046 authored by wangkx1's avatar wangkx1
Browse files

add all ollama

parent 22cb4ffc
Pipeline #1533 failed with stages
in 0 seconds
//go:build windows
package wintray
import (
"unsafe"
"golang.org/x/sys/windows"
)
// Contains window class information.
// It is used with the RegisterClassEx and GetClassInfoEx functions.
// https://msdn.microsoft.com/en-us/library/ms633577.aspx
type wndClassEx struct {
Size, Style uint32
WndProc uintptr
ClsExtra, WndExtra int32
Instance, Icon, Cursor, Background windows.Handle
MenuName, ClassName *uint16
IconSm windows.Handle
}
// Registers a window class for subsequent use in calls to the CreateWindow or CreateWindowEx function.
// https://msdn.microsoft.com/en-us/library/ms633587.aspx
func (w *wndClassEx) register() error {
w.Size = uint32(unsafe.Sizeof(*w))
res, _, err := pRegisterClass.Call(uintptr(unsafe.Pointer(w)))
if res == 0 {
return err
}
return nil
}
// Unregisters a window class, freeing the memory required for the class.
// https://msdn.microsoft.com/en-us/library/ms644899.aspx
func (w *wndClassEx) unregister() error {
res, _, err := pUnregisterClass.Call(
uintptr(unsafe.Pointer(w.ClassName)),
uintptr(w.Instance),
)
if res == 0 {
return err
}
return nil
}
package auth
import (
"bytes"
"context"
"crypto/rand"
"encoding/base64"
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strings"
"golang.org/x/crypto/ssh"
)
const defaultPrivateKey = "id_ed25519"
func keyPath() (string, error) {
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
}
func GetPublicKey() (string, error) {
keyPath, err := keyPath()
if err != nil {
return "", err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
if err != nil {
return "", err
}
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
return strings.TrimSpace(string(publicKey)), nil
}
func NewNonce(r io.Reader, length int) (string, error) {
nonce := make([]byte, length)
if _, err := io.ReadFull(r, nonce); err != nil {
return "", err
}
return base64.RawURLEncoding.EncodeToString(nonce), nil
}
func Sign(ctx context.Context, bts []byte) (string, error) {
keyPath, err := keyPath()
if err != nil {
return "", err
}
privateKeyFile, err := os.ReadFile(keyPath)
if err != nil {
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
return "", err
}
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
if err != nil {
return "", err
}
// get the pubkey, but remove the type
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
parts := bytes.Split(publicKey, []byte(" "))
if len(parts) < 2 {
return "", errors.New("malformed public key")
}
signedData, err := privateKey.Sign(rand.Reader, bts)
if err != nil {
return "", err
}
// signature is <pubkey>:<signature>
return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
}
package cmd
import (
"archive/zip"
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/sha256"
"encoding/pem"
"errors"
"fmt"
"io"
"log"
"math"
"net"
"net/http"
"os"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"slices"
"strings"
"syscall"
"time"
"github.com/containerd/console"
"github.com/mattn/go-runewidth"
"github.com/olekukonko/tablewriter"
"github.com/spf13/cobra"
"golang.org/x/crypto/ssh"
"golang.org/x/term"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
func CreateHandler(cmd *cobra.Command, args []string) error {
filename, _ := cmd.Flags().GetString("file")
filename, err := filepath.Abs(filename)
if err != nil {
return err
}
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
p := progress.NewProgress(os.Stderr)
defer p.Stop()
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
modelfile, err := parser.ParseFile(f)
if err != nil {
return err
}
home, err := os.UserHomeDir()
if err != nil {
return err
}
status := "transferring model data"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
for i := range modelfile.Commands {
switch modelfile.Commands[i].Name {
case "model", "adapter":
path := modelfile.Commands[i].Args
if path == "~" {
path = home
} else if strings.HasPrefix(path, "~/") {
path = filepath.Join(home, path[2:])
}
if !filepath.IsAbs(path) {
path = filepath.Join(filepath.Dir(filename), path)
}
fi, err := os.Stat(path)
if errors.Is(err, os.ErrNotExist) && modelfile.Commands[i].Name == "model" {
continue
} else if err != nil {
return err
}
if fi.IsDir() {
// this is likely a safetensors or pytorch directory
// TODO make this work w/ adapters
tempfile, err := tempZipFiles(path)
if err != nil {
return err
}
defer os.RemoveAll(tempfile)
path = tempfile
}
digest, err := createBlob(cmd, client, path)
if err != nil {
return err
}
modelfile.Commands[i].Args = "@" + digest
}
}
bars := make(map[string]*progress.Bar)
fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" {
spinner.Stop()
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
spinner.Stop()
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
quantize, _ := cmd.Flags().GetString("quantize")
request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantize: quantize}
if err := client.Create(cmd.Context(), &request, fn); err != nil {
return err
}
return nil
}
func tempZipFiles(path string) (string, error) {
tempfile, err := os.CreateTemp("", "ollama-tf")
if err != nil {
return "", err
}
defer tempfile.Close()
detectContentType := func(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
var b bytes.Buffer
b.Grow(512)
if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
return "", err
}
contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
return contentType, nil
}
glob := func(pattern, contentType string) ([]string, error) {
matches, err := filepath.Glob(pattern)
if err != nil {
return nil, err
}
for _, safetensor := range matches {
if ct, err := detectContentType(safetensor); err != nil {
return nil, err
} else if ct != contentType {
return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
}
}
return matches, nil
}
var files []string
if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
// safetensors files might be unresolved git lfs references; skip if they are
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
files = append(files, st...)
} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
files = append(files, pt...)
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers consolidated.x.pth, consolidated.pth
files = append(files, pt...)
} else {
return "", errors.New("no safetensors or torch files found")
}
// add configuration files, json files are detected as text/plain
js, err := glob(filepath.Join(path, "*.json"), "text/plain")
if err != nil {
return "", err
}
files = append(files, js...)
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
// tokenizer.model might be a unresolved git lfs reference; error if it is
files = append(files, tks...)
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
files = append(files, tks...)
}
zipfile := zip.NewWriter(tempfile)
defer zipfile.Close()
for _, file := range files {
f, err := os.Open(file)
if err != nil {
return "", err
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return "", err
}
zfi, err := zip.FileInfoHeader(fi)
if err != nil {
return "", err
}
zf, err := zipfile.CreateHeader(zfi)
if err != nil {
return "", err
}
if _, err := io.Copy(zf, f); err != nil {
return "", err
}
}
return tempfile.Name(), nil
}
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
bin, err := os.Open(path)
if err != nil {
return "", err
}
defer bin.Close()
hash := sha256.New()
if _, err := io.Copy(hash, bin); err != nil {
return "", err
}
if _, err := bin.Seek(0, io.SeekStart); err != nil {
return "", err
}
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
return "", err
}
return digest, nil
}
func RunHandler(cmd *cobra.Command, args []string) error {
interactive := true
opts := runOptions{
Model: args[0],
WordWrap: os.Getenv("TERM") == "xterm-256color",
Options: map[string]interface{}{},
}
format, err := cmd.Flags().GetString("format")
if err != nil {
return err
}
opts.Format = format
keepAlive, err := cmd.Flags().GetString("keepalive")
if err != nil {
return err
}
if keepAlive != "" {
d, err := time.ParseDuration(keepAlive)
if err != nil {
return err
}
opts.KeepAlive = &api.Duration{Duration: d}
}
prompts := args[1:]
// prepend stdin to the prompt if provided
if !term.IsTerminal(int(os.Stdin.Fd())) {
in, err := io.ReadAll(os.Stdin)
if err != nil {
return err
}
prompts = append([]string{string(in)}, prompts...)
opts.WordWrap = false
interactive = false
}
opts.Prompt = strings.Join(prompts, " ")
if len(prompts) > 0 {
interactive = false
}
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
opts.WordWrap = !nowrap
// Fill out the rest of the options based on information about the
// model.
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
name := args[0]
info, err := func() (*api.ShowResponse, error) {
showReq := &api.ShowRequest{Name: name}
info, err := client.Show(cmd.Context(), showReq)
var se api.StatusError
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
if err := PullHandler(cmd, []string{name}); err != nil {
return nil, err
}
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
}
return info, err
}()
if err != nil {
return err
}
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
opts.ParentModel = info.Details.ParentModel
if interactive {
if err := loadModel(cmd, &opts); err != nil {
return err
}
for _, msg := range info.Messages {
switch msg.Role {
case "user":
fmt.Printf(">>> %s\n", msg.Content)
case "assistant":
state := &displayResponseState{}
displayResponse(msg.Content, opts.WordWrap, state)
fmt.Println()
fmt.Println()
}
}
return generateInteractive(cmd, opts)
}
return generate(cmd, opts)
}
func errFromUnknownKey(unknownKeyErr error) error {
// find SSH public key in the error message
sshKeyPattern := `ssh-\w+ [^\s"]+`
re := regexp.MustCompile(sshKeyPattern)
matches := re.FindStringSubmatch(unknownKeyErr.Error())
if len(matches) > 0 {
serverPubKey := matches[0]
localPubKey, err := auth.GetPublicKey()
if err != nil {
return unknownKeyErr
}
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
// try the ollama service public key
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
if err != nil {
return unknownKeyErr
}
localPubKey = strings.TrimSpace(string(svcPubKey))
}
// check if the returned public key matches the local public key, this prevents adding a remote key to the user's account
if serverPubKey != localPubKey {
return unknownKeyErr
}
var msg strings.Builder
msg.WriteString(unknownKeyErr.Error())
msg.WriteString("\n\nYour ollama key is:\n")
msg.WriteString(localPubKey)
msg.WriteString("\nAdd your key at:\n")
msg.WriteString("https://ollama.com/settings/keys")
return errors.New(msg.String())
}
return unknownKeyErr
}
func PushHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
insecure, err := cmd.Flags().GetBool("insecure")
if err != nil {
return err
}
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" {
if spinner != nil {
spinner.Stop()
}
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.PushRequest{Name: args[0], Insecure: insecure}
if err := client.Push(cmd.Context(), &request, fn); err != nil {
if spinner != nil {
spinner.Stop()
}
if strings.Contains(err.Error(), "access denied") {
return errors.New("you are not authorized to push to this namespace, create the model under a namespace you own")
}
host := model.ParseName(args[0]).Host
isOllamaHost := strings.HasSuffix(host, ".ollama.ai") || strings.HasSuffix(host, ".ollama.com")
if strings.Contains(err.Error(), errtypes.UnknownOllamaKeyErrMsg) && isOllamaHost {
// the user has not added their ollama key to ollama.com
// re-throw an error with a more user-friendly message
return errFromUnknownKey(err)
}
return err
}
spinner.Stop()
return nil
}
func ListHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
models, err := client.List(cmd.Context())
if err != nil {
return err
}
var data [][]string
for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), format.HumanTime(m.ModifiedAt, "Never")})
}
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "MODIFIED"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding("\t")
table.AppendBulk(data)
table.Render()
return nil
}
func ListRunningHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
models, err := client.ListRunning(cmd.Context())
if err != nil {
return err
}
var data [][]string
for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
var procStr string
switch {
case m.SizeVRAM == 0:
procStr = "100% CPU"
case m.SizeVRAM == m.Size:
procStr = "100% GPU"
case m.SizeVRAM > m.Size || m.Size == 0:
procStr = "Unknown"
default:
sizeCPU := m.Size - m.SizeVRAM
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
}
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")})
}
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "UNTIL"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding("\t")
table.AppendBulk(data)
table.Render()
return nil
}
func DeleteHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
for _, name := range args {
req := api.DeleteRequest{Name: name}
if err := client.Delete(cmd.Context(), &req); err != nil {
return err
}
fmt.Printf("deleted '%s'\n", name)
}
return nil
}
func ShowHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
license, errLicense := cmd.Flags().GetBool("license")
modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
parameters, errParams := cmd.Flags().GetBool("parameters")
system, errSystem := cmd.Flags().GetBool("system")
template, errTemplate := cmd.Flags().GetBool("template")
for _, boolErr := range []error{errLicense, errModelfile, errParams, errSystem, errTemplate} {
if boolErr != nil {
return errors.New("error retrieving flags")
}
}
flagsSet := 0
showType := ""
if license {
flagsSet++
showType = "license"
}
if modelfile {
flagsSet++
showType = "modelfile"
}
if parameters {
flagsSet++
showType = "parameters"
}
if system {
flagsSet++
showType = "system"
}
if template {
flagsSet++
showType = "template"
}
if flagsSet > 1 {
return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
}
req := api.ShowRequest{Name: args[0]}
resp, err := client.Show(cmd.Context(), &req)
if err != nil {
return err
}
if flagsSet == 1 {
switch showType {
case "license":
fmt.Println(resp.License)
case "modelfile":
fmt.Println(resp.Modelfile)
case "parameters":
fmt.Println(resp.Parameters)
case "system":
fmt.Println(resp.System)
case "template":
fmt.Println(resp.Template)
}
return nil
}
showInfo(resp)
return nil
}
func showInfo(resp *api.ShowResponse) {
arch := resp.ModelInfo["general.architecture"].(string)
modelData := [][]string{
{"arch", arch},
{"parameters", resp.Details.ParameterSize},
{"quantization", resp.Details.QuantizationLevel},
{"context length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))},
{"embedding length", fmt.Sprintf("%v", resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64))},
}
mainTableData := [][]string{
{"Model"},
{renderSubTable(modelData, false)},
}
if resp.ProjectorInfo != nil {
projectorData := [][]string{
{"arch", "clip"},
{"parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))},
}
if projectorType, ok := resp.ProjectorInfo["clip.projector_type"]; ok {
projectorData = append(projectorData, []string{"projector type", projectorType.(string)})
}
projectorData = append(projectorData,
[]string{"embedding length", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.embedding_length"].(float64))},
[]string{"projection dimensionality", fmt.Sprintf("%v", resp.ProjectorInfo["clip.vision.projection_dim"].(float64))},
)
mainTableData = append(mainTableData,
[]string{"Projector"},
[]string{renderSubTable(projectorData, false)},
)
}
if resp.Parameters != "" {
mainTableData = append(mainTableData, []string{"Parameters"}, []string{formatParams(resp.Parameters)})
}
if resp.System != "" {
mainTableData = append(mainTableData, []string{"System"}, []string{renderSubTable(twoLines(resp.System), true)})
}
if resp.License != "" {
mainTableData = append(mainTableData, []string{"License"}, []string{renderSubTable(twoLines(resp.License), true)})
}
table := tablewriter.NewWriter(os.Stdout)
table.SetAutoWrapText(false)
table.SetBorder(false)
table.SetAlignment(tablewriter.ALIGN_LEFT)
for _, v := range mainTableData {
table.Append(v)
}
table.Render()
}
func renderSubTable(data [][]string, file bool) string {
var buf bytes.Buffer
table := tablewriter.NewWriter(&buf)
table.SetAutoWrapText(!file)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding("\t")
table.SetAlignment(tablewriter.ALIGN_LEFT)
for _, v := range data {
table.Append(v)
}
table.Render()
renderedTable := buf.String()
lines := strings.Split(renderedTable, "\n")
for i, line := range lines {
lines[i] = "\t" + line
}
return strings.Join(lines, "\n")
}
func twoLines(s string) [][]string {
lines := strings.Split(s, "\n")
res := [][]string{}
count := 0
for _, line := range lines {
line = strings.TrimSpace(line)
if line != "" {
count++
res = append(res, []string{line})
if count == 2 {
return res
}
}
}
return res
}
func formatParams(s string) string {
lines := strings.Split(s, "\n")
table := [][]string{}
for _, line := range lines {
table = append(table, strings.Fields(line))
}
return renderSubTable(table, false)
}
func CopyHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
req := api.CopyRequest{Source: args[0], Destination: args[1]}
if err := client.Copy(cmd.Context(), &req); err != nil {
return err
}
fmt.Printf("copied '%s' to '%s'\n", args[0], args[1])
return nil
}
func PullHandler(cmd *cobra.Command, args []string) error {
insecure, err := cmd.Flags().GetBool("insecure")
if err != nil {
return err
}
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" {
if spinner != nil {
spinner.Stop()
}
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.PullRequest{Name: args[0], Insecure: insecure}
if err := client.Pull(cmd.Context(), &request, fn); err != nil {
return err
}
return nil
}
type generateContextKey string
type runOptions struct {
Model string
ParentModel string
Prompt string
Messages []api.Message
WordWrap bool
Format string
System string
Images []api.ImageData
Options map[string]interface{}
MultiModal bool
KeepAlive *api.Duration
}
type displayResponseState struct {
lineLength int
wordBuffer string
}
func displayResponse(content string, wordWrap bool, state *displayResponseState) {
termWidth, _, _ := term.GetSize(int(os.Stdout.Fd()))
if wordWrap && termWidth >= 10 {
for _, ch := range content {
if state.lineLength+1 > termWidth-5 {
if runewidth.StringWidth(state.wordBuffer) > termWidth-10 {
fmt.Printf("%s%c", state.wordBuffer, ch)
state.wordBuffer = ""
state.lineLength = 0
continue
}
// backtrack the length of the last word and clear to the end of the line
a := runewidth.StringWidth(state.wordBuffer)
if a > 0 {
fmt.Printf("\x1b[%dD", a)
}
fmt.Printf("\x1b[K\n")
fmt.Printf("%s%c", state.wordBuffer, ch)
chWidth := runewidth.RuneWidth(ch)
state.lineLength = runewidth.StringWidth(state.wordBuffer) + chWidth
} else {
fmt.Print(string(ch))
state.lineLength += runewidth.RuneWidth(ch)
if runewidth.RuneWidth(ch) >= 2 {
state.wordBuffer = ""
continue
}
switch ch {
case ' ':
state.wordBuffer = ""
case '\n':
state.lineLength = 0
default:
state.wordBuffer += string(ch)
}
}
}
} else {
fmt.Printf("%s%s", state.wordBuffer, content)
if len(state.wordBuffer) > 0 {
state.wordBuffer = ""
}
}
}
func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
client, err := api.ClientFromEnvironment()
if err != nil {
return nil, err
}
p := progress.NewProgress(os.Stderr)
defer p.StopAndClear()
spinner := progress.NewSpinner("")
p.Add("", spinner)
cancelCtx, cancel := context.WithCancel(cmd.Context())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
go func() {
<-sigChan
cancel()
}()
var state *displayResponseState = &displayResponseState{}
var latest api.ChatResponse
var fullResponse strings.Builder
var role string
fn := func(response api.ChatResponse) error {
p.StopAndClear()
latest = response
role = response.Message.Role
content := response.Message.Content
fullResponse.WriteString(content)
displayResponse(content, opts.WordWrap, state)
return nil
}
req := &api.ChatRequest{
Model: opts.Model,
Messages: opts.Messages,
Format: opts.Format,
Options: opts.Options,
}
if opts.KeepAlive != nil {
req.KeepAlive = opts.KeepAlive
}
if err := client.Chat(cancelCtx, req, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil, nil
}
return nil, err
}
if len(opts.Messages) > 0 {
fmt.Println()
fmt.Println()
}
verbose, err := cmd.Flags().GetBool("verbose")
if err != nil {
return nil, err
}
if verbose {
latest.Summary()
}
return &api.Message{Role: role, Content: fullResponse.String()}, nil
}
func generate(cmd *cobra.Command, opts runOptions) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
p := progress.NewProgress(os.Stderr)
defer p.StopAndClear()
spinner := progress.NewSpinner("")
p.Add("", spinner)
var latest api.GenerateResponse
generateContext, ok := cmd.Context().Value(generateContextKey("context")).([]int)
if !ok {
generateContext = []int{}
}
ctx, cancel := context.WithCancel(cmd.Context())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
go func() {
<-sigChan
cancel()
}()
var state *displayResponseState = &displayResponseState{}
fn := func(response api.GenerateResponse) error {
p.StopAndClear()
latest = response
content := response.Response
displayResponse(content, opts.WordWrap, state)
return nil
}
if opts.MultiModal {
opts.Prompt, opts.Images, err = extractFileData(opts.Prompt)
if err != nil {
return err
}
}
request := api.GenerateRequest{
Model: opts.Model,
Prompt: opts.Prompt,
Context: generateContext,
Images: opts.Images,
Format: opts.Format,
System: opts.System,
Options: opts.Options,
KeepAlive: opts.KeepAlive,
}
if err := client.Generate(ctx, &request, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil
}
return err
}
if opts.Prompt != "" {
fmt.Println()
fmt.Println()
}
if !latest.Done {
return nil
}
verbose, err := cmd.Flags().GetBool("verbose")
if err != nil {
return err
}
if verbose {
latest.Summary()
}
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
cmd.SetContext(ctx)
return nil
}
func RunServer(cmd *cobra.Command, _ []string) error {
if err := initializeKeypair(); err != nil {
return err
}
ln, err := net.Listen("tcp", envconfig.Host().Host)
if err != nil {
return err
}
err = server.Serve(ln)
if errors.Is(err, http.ErrServerClosed) {
return nil
}
return err
}
func initializeKeypair() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
privKeyPath := filepath.Join(home, ".ollama", "id_ed25519")
pubKeyPath := filepath.Join(home, ".ollama", "id_ed25519.pub")
_, err = os.Stat(privKeyPath)
if os.IsNotExist(err) {
fmt.Printf("Couldn't find '%s'. Generating new private key.\n", privKeyPath)
cryptoPublicKey, cryptoPrivateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return err
}
privateKeyBytes, err := ssh.MarshalPrivateKey(cryptoPrivateKey, "")
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(privKeyPath), 0o755); err != nil {
return fmt.Errorf("could not create directory %w", err)
}
if err := os.WriteFile(privKeyPath, pem.EncodeToMemory(privateKeyBytes), 0o600); err != nil {
return err
}
sshPublicKey, err := ssh.NewPublicKey(cryptoPublicKey)
if err != nil {
return err
}
publicKeyBytes := ssh.MarshalAuthorizedKey(sshPublicKey)
if err := os.WriteFile(pubKeyPath, publicKeyBytes, 0o644); err != nil {
return err
}
fmt.Printf("Your new public key is: \n\n%s\n", publicKeyBytes)
}
return nil
}
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
if err := client.Heartbeat(cmd.Context()); err != nil {
if !strings.Contains(err.Error(), " refused") {
return err
}
if err := startApp(cmd.Context(), client); err != nil {
return errors.New("could not connect to ollama app, is it running?")
}
}
return nil
}
func versionHandler(cmd *cobra.Command, _ []string) {
client, err := api.ClientFromEnvironment()
if err != nil {
return
}
serverVersion, err := client.Version(cmd.Context())
if err != nil {
fmt.Println("Warning: could not connect to a running Ollama instance")
}
if serverVersion != "" {
fmt.Printf("ollama version is %s\n", serverVersion)
}
if serverVersion != version.Version {
fmt.Printf("Warning: client version is %s\n", version.Version)
}
}
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
if len(envs) == 0 {
return
}
envUsage := `
Environment Variables:
`
for _, e := range envs {
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
}
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
}
func NewCLI() *cobra.Command {
log.SetFlags(log.LstdFlags | log.Lshortfile)
cobra.EnableCommandSorting = false
if runtime.GOOS == "windows" {
console.ConsoleFromFile(os.Stdin) //nolint:errcheck
}
rootCmd := &cobra.Command{
Use: "ollama",
Short: "Large language model runner",
SilenceUsage: true,
SilenceErrors: true,
CompletionOptions: cobra.CompletionOptions{
DisableDefaultCmd: true,
},
Run: func(cmd *cobra.Command, args []string) {
if version, _ := cmd.Flags().GetBool("version"); version {
versionHandler(cmd, args)
return
}
cmd.Print(cmd.UsageString())
},
}
rootCmd.Flags().BoolP("version", "v", false, "Show version information")
createCmd := &cobra.Command{
Use: "create MODEL",
Short: "Create a model from a Modelfile",
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: CreateHandler,
}
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile")
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
showCmd := &cobra.Command{
Use: "show MODEL",
Short: "Show information for a model",
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: ShowHandler,
}
showCmd.Flags().Bool("license", false, "Show license of a model")
showCmd.Flags().Bool("modelfile", false, "Show Modelfile of a model")
showCmd.Flags().Bool("parameters", false, "Show parameters of a model")
showCmd.Flags().Bool("template", false, "Show template of a model")
showCmd.Flags().Bool("system", false, "Show system message of a model")
runCmd := &cobra.Command{
Use: "run MODEL [PROMPT]",
Short: "Run a model",
Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat,
RunE: RunHandler,
}
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
runCmd.Flags().Bool("verbose", false, "Show timings for response")
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
runCmd.Flags().String("format", "", "Response format (e.g. json)")
serveCmd := &cobra.Command{
Use: "serve",
Aliases: []string{"start"},
Short: "Start ollama",
Args: cobra.ExactArgs(0),
RunE: RunServer,
}
pullCmd := &cobra.Command{
Use: "pull MODEL",
Short: "Pull a model from a registry",
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: PullHandler,
}
pullCmd.Flags().Bool("insecure", false, "Use an insecure registry")
pushCmd := &cobra.Command{
Use: "push MODEL",
Short: "Push a model to a registry",
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: PushHandler,
}
pushCmd.Flags().Bool("insecure", false, "Use an insecure registry")
listCmd := &cobra.Command{
Use: "list",
Aliases: []string{"ls"},
Short: "List models",
PreRunE: checkServerHeartbeat,
RunE: ListHandler,
}
psCmd := &cobra.Command{
Use: "ps",
Short: "List running models",
PreRunE: checkServerHeartbeat,
RunE: ListRunningHandler,
}
copyCmd := &cobra.Command{
Use: "cp SOURCE DESTINATION",
Short: "Copy a model",
Args: cobra.ExactArgs(2),
PreRunE: checkServerHeartbeat,
RunE: CopyHandler,
}
deleteCmd := &cobra.Command{
Use: "rm MODEL [MODEL...]",
Short: "Remove a model",
Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat,
RunE: DeleteHandler,
}
envVars := envconfig.AsMap()
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
for _, cmd := range []*cobra.Command{
createCmd,
showCmd,
runCmd,
pullCmd,
pushCmd,
listCmd,
psCmd,
copyCmd,
deleteCmd,
serveCmd,
} {
switch cmd {
case runCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
case serveCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{
envVars["OLLAMA_DEBUG"],
envVars["OLLAMA_HOST"],
envVars["OLLAMA_KEEP_ALIVE"],
envVars["OLLAMA_MAX_LOADED_MODELS"],
envVars["OLLAMA_MAX_QUEUE"],
envVars["OLLAMA_MODELS"],
envVars["OLLAMA_NUM_PARALLEL"],
envVars["OLLAMA_NOPRUNE"],
envVars["OLLAMA_ORIGINS"],
envVars["OLLAMA_SCHED_SPREAD"],
envVars["OLLAMA_TMPDIR"],
envVars["OLLAMA_FLASH_ATTENTION"],
envVars["OLLAMA_LLM_LIBRARY"],
})
default:
appendEnvDocs(cmd, envs)
}
}
rootCmd.AddCommand(
serveCmd,
createCmd,
showCmd,
runCmd,
pullCmd,
pushCmd,
listCmd,
psCmd,
copyCmd,
deleteCmd,
)
return rootCmd
}
package cmd
import (
"cmp"
"errors"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"regexp"
"slices"
"strings"
"github.com/spf13/cobra"
"golang.org/x/exp/maps"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/readline"
"github.com/ollama/ollama/types/errtypes"
)
type MultilineState int
const (
MultilineNone MultilineState = iota
MultilinePrompt
MultilineSystem
)
func loadModel(cmd *cobra.Command, opts *runOptions) error {
p := progress.NewProgress(os.Stderr)
defer p.StopAndClear()
spinner := progress.NewSpinner("")
p.Add("", spinner)
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
chatReq := &api.ChatRequest{
Model: opts.Model,
KeepAlive: opts.KeepAlive,
}
return client.Chat(cmd.Context(), chatReq, func(api.ChatResponse) error { return nil })
}
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
usage := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set Set session variables")
fmt.Fprintln(os.Stderr, " /show Show model information")
fmt.Fprintln(os.Stderr, " /load <model> Load a session or model")
fmt.Fprintln(os.Stderr, " /save <model> Save your current session")
fmt.Fprintln(os.Stderr, " /clear Clear session context")
fmt.Fprintln(os.Stderr, " /bye Exit")
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
if opts.MultiModal {
fmt.Fprintf(os.Stderr, "Use %s to include .jpg or .png images.\n", filepath.FromSlash("/path/to/file"))
}
fmt.Fprintln(os.Stderr, "")
}
usageSet := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
fmt.Fprintln(os.Stderr, " /set history Enable history")
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
fmt.Fprintln(os.Stderr, "")
}
usageShortcuts := func() {
fmt.Fprintln(os.Stderr, "Available keyboard shortcuts:")
fmt.Fprintln(os.Stderr, " Ctrl + a Move to the beginning of the line (Home)")
fmt.Fprintln(os.Stderr, " Ctrl + e Move to the end of the line (End)")
fmt.Fprintln(os.Stderr, " Alt + b Move back (left) one word")
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
fmt.Fprintln(os.Stderr, " Ctrl + w Delete the word before the cursor")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
fmt.Fprintln(os.Stderr, " Ctrl + d Exit ollama (/bye)")
fmt.Fprintln(os.Stderr, "")
}
usageShow := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /show info Show details for this model")
fmt.Fprintln(os.Stderr, " /show license Show model license")
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
fmt.Fprintln(os.Stderr, " /show system Show system message")
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
fmt.Fprintln(os.Stderr, "")
}
// only list out the most common parameters
usageParameters := func() {
fmt.Fprintln(os.Stderr, "Available Parameters:")
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
fmt.Fprintln(os.Stderr, " /set parameter min_p <float> Pick token based on top token probability * min_p")
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
fmt.Fprintln(os.Stderr, " /set parameter stop <string> <string> ... Set the stop parameters")
fmt.Fprintln(os.Stderr, "")
}
scanner, err := readline.New(readline.Prompt{
Prompt: ">>> ",
AltPrompt: "... ",
Placeholder: "Send a message (/? for help)",
AltPlaceholder: `Use """ to end multi-line input`,
})
if err != nil {
return err
}
if envconfig.NoHistory() {
scanner.HistoryDisable()
}
fmt.Print(readline.StartBracketedPaste)
defer fmt.Printf(readline.EndBracketedPaste)
var sb strings.Builder
var multiline MultilineState
for {
line, err := scanner.Readline()
switch {
case errors.Is(err, io.EOF):
fmt.Println()
return nil
case errors.Is(err, readline.ErrInterrupt):
if line == "" {
fmt.Println("\nUse Ctrl + d or /bye to exit.")
}
scanner.Prompt.UseAlt = false
sb.Reset()
continue
case err != nil:
return err
}
switch {
case multiline != MultilineNone:
// check if there's a multiline terminating string
before, ok := strings.CutSuffix(line, `"""`)
sb.WriteString(before)
if !ok {
fmt.Fprintln(&sb)
continue
}
switch multiline {
case MultilineSystem:
opts.System = sb.String()
opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
fmt.Println("Set system message.")
sb.Reset()
}
multiline = MultilineNone
scanner.Prompt.UseAlt = false
case strings.HasPrefix(line, `"""`):
line := strings.TrimPrefix(line, `"""`)
line, ok := strings.CutSuffix(line, `"""`)
sb.WriteString(line)
if !ok {
// no multiline terminating string; need more input
fmt.Fprintln(&sb)
multiline = MultilinePrompt
scanner.Prompt.UseAlt = true
}
case scanner.Pasting:
fmt.Fprintln(&sb, line)
continue
case strings.HasPrefix(line, "/list"):
args := strings.Fields(line)
if err := ListHandler(cmd, args[1:]); err != nil {
return err
}
case strings.HasPrefix(line, "/load"):
args := strings.Fields(line)
if len(args) != 2 {
fmt.Println("Usage:\n /load <modelname>")
continue
}
opts.Model = args[1]
opts.Messages = []api.Message{}
fmt.Printf("Loading model '%s'\n", opts.Model)
if err := loadModel(cmd, &opts); err != nil {
return err
}
continue
case strings.HasPrefix(line, "/save"):
args := strings.Fields(line)
if len(args) != 2 {
fmt.Println("Usage:\n /save <modelname>")
continue
}
client, err := api.ClientFromEnvironment()
if err != nil {
fmt.Println("error: couldn't connect to ollama server")
return err
}
req := &api.CreateRequest{
Name: args[1],
Modelfile: buildModelfile(opts),
}
fn := func(resp api.ProgressResponse) error { return nil }
err = client.Create(cmd.Context(), req, fn)
if err != nil {
if strings.Contains(err.Error(), errtypes.InvalidModelNameErrMsg) {
fmt.Printf("error: The model name '%s' is invalid\n", args[1])
continue
}
return err
}
fmt.Printf("Created new model '%s'\n", args[1])
continue
case strings.HasPrefix(line, "/clear"):
opts.Messages = []api.Message{}
if opts.System != "" {
newMessage := api.Message{Role: "system", Content: opts.System}
opts.Messages = append(opts.Messages, newMessage)
}
fmt.Println("Cleared session context")
continue
case strings.HasPrefix(line, "/set"):
args := strings.Fields(line)
if len(args) > 1 {
switch args[1] {
case "history":
scanner.HistoryEnable()
case "nohistory":
scanner.HistoryDisable()
case "wordwrap":
opts.WordWrap = true
fmt.Println("Set 'wordwrap' mode.")
case "nowordwrap":
opts.WordWrap = false
fmt.Println("Set 'nowordwrap' mode.")
case "verbose":
if err := cmd.Flags().Set("verbose", "true"); err != nil {
return err
}
fmt.Println("Set 'verbose' mode.")
case "quiet":
if err := cmd.Flags().Set("verbose", "false"); err != nil {
return err
}
fmt.Println("Set 'quiet' mode.")
case "format":
if len(args) < 3 || args[2] != "json" {
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
} else {
opts.Format = args[2]
fmt.Printf("Set format to '%s' mode.\n", args[2])
}
case "noformat":
opts.Format = ""
fmt.Println("Disabled format.")
case "parameter":
if len(args) < 4 {
usageParameters()
continue
}
params := args[3:]
fp, err := api.FormatParams(map[string][]string{args[2]: params})
if err != nil {
fmt.Printf("Couldn't set parameter: %q\n", err)
continue
}
fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
opts.Options[args[2]] = fp[args[2]]
case "system":
if len(args) < 3 {
usageSet()
continue
}
multiline = MultilineSystem
line := strings.Join(args[2:], " ")
line, ok := strings.CutPrefix(line, `"""`)
if !ok {
multiline = MultilineNone
} else {
// only cut suffix if the line is multiline
line, ok = strings.CutSuffix(line, `"""`)
if ok {
multiline = MultilineNone
}
}
sb.WriteString(line)
if multiline != MultilineNone {
scanner.Prompt.UseAlt = true
continue
}
opts.System = sb.String() // for display in modelfile
newMessage := api.Message{Role: "system", Content: sb.String()}
// Check if the slice is not empty and the last message is from 'system'
if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
// Replace the last message
opts.Messages[len(opts.Messages)-1] = newMessage
} else {
opts.Messages = append(opts.Messages, newMessage)
}
fmt.Println("Set system message.")
sb.Reset()
sb.Reset()
continue
default:
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
}
} else {
usageSet()
}
case strings.HasPrefix(line, "/show"):
args := strings.Fields(line)
if len(args) > 1 {
client, err := api.ClientFromEnvironment()
if err != nil {
fmt.Println("error: couldn't connect to ollama server")
return err
}
req := &api.ShowRequest{
Name: opts.Model,
System: opts.System,
Options: opts.Options,
}
resp, err := client.Show(cmd.Context(), req)
if err != nil {
fmt.Println("error: couldn't get model")
return err
}
switch args[1] {
case "info":
showInfo(resp)
case "license":
if resp.License == "" {
fmt.Println("No license was specified for this model.")
} else {
fmt.Println(resp.License)
}
case "modelfile":
fmt.Println(resp.Modelfile)
case "parameters":
if resp.Parameters == "" {
fmt.Println("No parameters were specified for this model.")
} else {
if len(opts.Options) > 0 {
fmt.Println("User defined parameters:")
for k, v := range opts.Options {
fmt.Printf("%-*s %v\n", 30, k, v)
}
fmt.Println()
}
fmt.Println("Model defined parameters:")
fmt.Println(resp.Parameters)
}
case "system":
switch {
case opts.System != "":
fmt.Println(opts.System + "\n")
case resp.System != "":
fmt.Println(resp.System + "\n")
default:
fmt.Println("No system message was specified for this model.")
}
case "template":
if resp.Template != "" {
fmt.Println(resp.Template)
} else {
fmt.Println("No prompt template was specified for this model.")
}
default:
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
}
} else {
usageShow()
}
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
args := strings.Fields(line)
if len(args) > 1 {
switch args[1] {
case "set", "/set":
usageSet()
case "show", "/show":
usageShow()
case "shortcut", "shortcuts":
usageShortcuts()
}
} else {
usage()
}
case strings.HasPrefix(line, "/exit"), strings.HasPrefix(line, "/bye"):
return nil
case strings.HasPrefix(line, "/"):
args := strings.Fields(line)
isFile := false
if opts.MultiModal {
for _, f := range extractFileNames(line) {
if strings.HasPrefix(f, args[0]) {
isFile = true
break
}
}
}
if !isFile {
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
continue
}
sb.WriteString(line)
default:
sb.WriteString(line)
}
if sb.Len() > 0 && multiline == MultilineNone {
newMessage := api.Message{Role: "user", Content: sb.String()}
if opts.MultiModal {
msg, images, err := extractFileData(sb.String())
if err != nil {
return err
}
// clear all previous images for better responses
if len(images) > 0 {
for i := range opts.Messages {
opts.Messages[i].Images = nil
}
}
newMessage.Content = msg
newMessage.Images = images
}
opts.Messages = append(opts.Messages, newMessage)
assistant, err := chat(cmd, opts)
if err != nil {
return err
}
if assistant != nil {
opts.Messages = append(opts.Messages, *assistant)
}
sb.Reset()
}
}
}
func buildModelfile(opts runOptions) string {
var f parser.File
f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)})
if opts.System != "" {
f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System})
}
keys := maps.Keys(opts.Options)
slices.Sort(keys)
for _, k := range keys {
v := opts.Options[k]
var cmds []parser.Command
switch t := v.(type) {
case []string:
for _, s := range t {
cmds = append(cmds, parser.Command{Name: k, Args: s})
}
default:
cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
}
f.Commands = append(f.Commands, cmds...)
}
for _, msg := range opts.Messages {
f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)})
}
return f.String()
}
func normalizeFilePath(fp string) string {
// Define a map of escaped characters and their replacements
replacements := map[string]string{
"\\ ": " ", // Escaped space
"\\(": "(", // Escaped left parenthesis
"\\)": ")", // Escaped right parenthesis
"\\[": "[", // Escaped left square bracket
"\\]": "]", // Escaped right square bracket
"\\{": "{", // Escaped left curly brace
"\\}": "}", // Escaped right curly brace
"\\$": "$", // Escaped dollar sign
"\\&": "&", // Escaped ampersand
"\\;": ";", // Escaped semicolon
"\\'": "'", // Escaped single quote
"\\\\": "\\", // Escaped backslash
"\\*": "*", // Escaped asterisk
"\\?": "?", // Escaped question mark
}
for escaped, actual := range replacements {
fp = strings.ReplaceAll(fp, escaped, actual)
}
return fp
}
func extractFileNames(input string) []string {
// Regex to match file paths starting with optional drive letter, / ./ \ or .\ and include escaped or unescaped spaces (\ or %20)
// and followed by more characters and a file extension
// This will capture non filename strings, but we'll check for file existence to remove mismatches
regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
re := regexp.MustCompile(regexPattern)
return re.FindAllString(input, -1)
}
func extractFileData(input string) (string, []api.ImageData, error) {
filePaths := extractFileNames(input)
var imgs []api.ImageData
for _, fp := range filePaths {
nfp := normalizeFilePath(fp)
data, err := getImageData(nfp)
if err != nil {
if os.IsNotExist(err) {
continue
}
fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
return "", imgs, err
}
fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
input = strings.ReplaceAll(input, fp, "")
imgs = append(imgs, data)
}
return input, imgs, nil
}
func getImageData(filePath string) ([]byte, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()
buf := make([]byte, 512)
_, err = file.Read(buf)
if err != nil {
return nil, err
}
contentType := http.DetectContentType(buf)
allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"}
if !slices.Contains(allowedTypes, contentType) {
return nil, fmt.Errorf("invalid image type: %s", contentType)
}
info, err := file.Stat()
if err != nil {
return nil, err
}
// Check if the file size exceeds 100MB
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
if info.Size() > maxSize {
return nil, errors.New("file size exceeds maximum limit (100MB)")
}
buf = make([]byte, info.Size())
_, err = file.Seek(0, 0)
if err != nil {
return nil, err
}
_, err = io.ReadFull(file, buf)
if err != nil {
return nil, err
}
return buf, nil
}
package cmd
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/ollama/ollama/api"
)
func TestExtractFilenames(t *testing.T) {
// Unix style paths
input := ` some preamble
./relative\ path/one.png inbetween1 ./not a valid two.jpg inbetween2
/unescaped space /three.jpeg inbetween3 /valid\ path/dir/four.png "./quoted with spaces/five.svg`
res := extractFileNames(input)
assert.Len(t, res, 5)
assert.Contains(t, res[0], "one.png")
assert.Contains(t, res[1], "two.jpg")
assert.Contains(t, res[2], "three.jpeg")
assert.Contains(t, res[3], "four.png")
assert.Contains(t, res[4], "five.svg")
assert.NotContains(t, res[4], '"')
assert.NotContains(t, res, "inbtween")
// Windows style paths
input = ` some preamble
c:/users/jdoe/one.png inbetween1 c:/program files/someplace/two.jpg inbetween2
/absolute/nospace/three.jpeg inbetween3 /absolute/with space/four.png inbetween4
./relative\ path/five.svg inbetween5 "./relative with/spaces/six.png inbetween6
d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
d:\program files\someplace\nine.png inbetween9 "E:\program files\someplace\ten.svg some ending
`
res = extractFileNames(input)
assert.Len(t, res, 10)
assert.NotContains(t, res, "inbtween")
assert.Contains(t, res[0], "one.png")
assert.Contains(t, res[0], "c:")
assert.Contains(t, res[1], "two.jpg")
assert.Contains(t, res[1], "c:")
assert.Contains(t, res[2], "three.jpeg")
assert.Contains(t, res[3], "four.png")
assert.Contains(t, res[4], "five.svg")
assert.Contains(t, res[5], "six.png")
assert.Contains(t, res[6], "seven.svg")
assert.Contains(t, res[6], "d:")
assert.Contains(t, res[7], "eight.png")
assert.Contains(t, res[7], "c:")
assert.Contains(t, res[8], "nine.png")
assert.Contains(t, res[8], "d:")
assert.Contains(t, res[9], "ten.svg")
assert.Contains(t, res[9], "E:")
}
func TestModelfileBuilder(t *testing.T) {
opts := runOptions{
Model: "hork",
System: "You are part horse and part shark, but all hork. Do horklike things",
Messages: []api.Message{
{Role: "user", Content: "Hey there hork!"},
{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
},
Options: map[string]any{
"temperature": 0.9,
"seed": 42,
"penalize_newline": false,
"stop": []string{"hi", "there"},
},
}
t.Run("model", func(t *testing.T) {
expect := `FROM hork
SYSTEM You are part horse and part shark, but all hork. Do horklike things
PARAMETER penalize_newline false
PARAMETER seed 42
PARAMETER stop hi
PARAMETER stop there
PARAMETER temperature 0.9
MESSAGE user Hey there hork!
MESSAGE assistant Yes it is true, I am half horse, half shark.
`
actual := buildModelfile(opts)
if diff := cmp.Diff(expect, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
})
t.Run("parent model", func(t *testing.T) {
opts.ParentModel = "horseshark"
expect := `FROM horseshark
SYSTEM You are part horse and part shark, but all hork. Do horklike things
PARAMETER penalize_newline false
PARAMETER seed 42
PARAMETER stop hi
PARAMETER stop there
PARAMETER temperature 0.9
MESSAGE user Hey there hork!
MESSAGE assistant Yes it is true, I am half horse, half shark.
`
actual := buildModelfile(opts)
if diff := cmp.Diff(expect, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
})
}
//go:build darwin || windows
package cmd
import (
"context"
"errors"
"time"
"github.com/ollama/ollama/api"
)
func waitForServer(ctx context.Context, client *api.Client) error {
// wait for the server to start
timeout := time.After(5 * time.Second)
tick := time.Tick(500 * time.Millisecond)
for {
select {
case <-timeout:
return errors.New("timed out waiting for server to start")
case <-tick:
if err := client.Heartbeat(ctx); err == nil {
return nil // server has started
}
}
}
}
package cmd
import (
"context"
"errors"
"os"
"os/exec"
"strings"
"github.com/ollama/ollama/api"
)
func startApp(ctx context.Context, client *api.Client) error {
exe, err := os.Executable()
if err != nil {
return err
}
link, err := os.Readlink(exe)
if err != nil {
return err
}
if !strings.Contains(link, "Ollama.app") {
return errors.New("could not find ollama app")
}
path := strings.Split(link, "Ollama.app")
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
return err
}
return waitForServer(ctx, client)
}
//go:build !windows && !darwin
package cmd
import (
"context"
"errors"
"github.com/ollama/ollama/api"
)
func startApp(ctx context.Context, client *api.Client) error {
return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
}
package cmd
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"github.com/ollama/ollama/api"
)
func startApp(ctx context.Context, client *api.Client) error {
// log.Printf("XXX Attempting to find and start ollama app")
AppName := "ollama app.exe"
exe, err := os.Executable()
if err != nil {
return err
}
appExe := filepath.Join(filepath.Dir(exe), AppName)
_, err = os.Stat(appExe)
if errors.Is(err, os.ErrNotExist) {
// Try the standard install location
localAppData := os.Getenv("LOCALAPPDATA")
appExe = filepath.Join(localAppData, "Ollama", AppName)
_, err := os.Stat(appExe)
if errors.Is(err, os.ErrNotExist) {
// Finally look in the path
appExe, err = exec.LookPath(AppName)
if err != nil {
return errors.New("could not locate ollama app")
}
}
}
// log.Printf("XXX attempting to start app %s", appExe)
cmd_path := "c:\\Windows\\system32\\cmd.exe"
cmd := exec.Command(cmd_path, "/c", appExe)
// TODO - these hide flags aren't working - still pops up a command window for some reason
cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: 0x08000000, HideWindow: true}
// TODO this didn't help either...
cmd.Stdin = strings.NewReader("")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
return fmt.Errorf("unable to start ollama app %w", err)
}
if cmd.Process != nil {
defer cmd.Process.Release() //nolint:errcheck
}
return waitForServer(ctx, client)
}
package convert
import (
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"log/slog"
"github.com/ollama/ollama/llm"
)
type Parameters struct {
Architectures []string `json:"architectures"`
VocabSize uint32 `json:"vocab_size"`
}
func (Parameters) KV(t *Tokenizer) llm.KV {
kv := llm.KV{
"general.file_type": uint32(1),
"general.quantization_version": uint32(2),
"tokenizer.ggml.pre": t.Pre,
"tokenizer.ggml.model": t.Vocabulary.Model,
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
"tokenizer.ggml.scores": t.Vocabulary.Scores,
"tokenizer.ggml.token_type": t.Vocabulary.Types,
}
if t.Template != "" {
kv["tokenizer.chat_template"] = t.Template
}
for _, sv := range t.SpecialVocabulary {
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
}
return kv
}
func (Parameters) specialTokenTypes() []string {
return []string{
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
}
}
func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
}
type Converter interface {
// KV maps parameters to LLM key-values
KV(*Tokenizer) llm.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor
// tensorName returns the LLM tensor name for a specific input name
tensorName(string) string
// specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func Convert(fsys fs.FS, ws io.WriteSeeker) error {
bts, err := fs.ReadFile(fsys, "config.json")
if err != nil {
return err
}
var p Parameters
if err := json.Unmarshal(bts, &p); err != nil {
return err
}
if len(p.Architectures) < 1 {
return errors.New("unknown architecture")
}
var conv Converter
switch p.Architectures[0] {
case "LlamaForCausalLM", "MistralForCausalLM":
conv = &llama{}
case "MixtralForCausalLM":
conv = &mixtral{}
case "GemmaForCausalLM":
conv = &gemma{}
default:
return errors.New("unsupported architecture")
}
if err := json.Unmarshal(bts, conv); err != nil {
return err
}
t, err := parseTokenizer(fsys, conv.specialTokenTypes())
if err != nil {
return err
}
if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
for i := range vocabSize - len(t.Vocabulary.Tokens) {
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
}
} else {
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
}
ts, err := parseTensors(fsys)
if err != nil {
return err
}
return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
}
package convert
import (
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type gemma struct {
Parameters
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
HiddenSize uint32 `json:"hidden_size"`
HiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
RMSNormEPS float32 `json:"rms_norm_eps"`
HeadDim uint32 `json:"head_dim"`
}
var _ Converter = (*gemma)(nil)
func (p *gemma) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t)
kv["general.architecture"] = "gemma"
kv["general.name"] = "gemma"
kv["gemma.context_length"] = p.MaxPositionEmbeddings
kv["gemma.embedding_length"] = p.HiddenSize
kv["gemma.block_count"] = p.HiddenLayers
kv["gemma.feed_forward_length"] = p.IntermediateSize
kv["gemma.attention.head_count"] = p.NumAttentionHeads
kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["gemma.attention.key_length"] = p.HeadDim
kv["gemma.attention.value_length"] = p.HeadDim
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
return kv
}
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
name := p.tensorName(t.Name())
if strings.HasSuffix(name, "_norm.weight") {
t.SetRepacker(p.addOne)
}
out = append(out, llm.Tensor{
Name: name,
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *gemma) tensorName(n string) string {
return strings.NewReplacer(
"model.embed_tokens", "token_embd",
"model.norm", "output_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"mlp.gate_proj", "ffn_gate",
"mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm",
"block_sparse_moe.gate", "ffn_inp",
).Replace(n)
}
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
ones := tensor.Ones(tensor.Float32, int(shape[0]))
n, err := n.Add(ones)
if err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 0)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}
package convert
import (
"cmp"
"fmt"
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type llama struct {
Parameters
NLayers uint32 `json:"n_layers"`
NumHiddenLayers uint32 `json:"num_hidden_layers"`
NLayer uint32 `json:"n_layer"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
NCtx uint32 `json:"n_ctx"`
HiddenSize uint32 `json:"hidden_size"`
NEmbd uint32 `json:"n_embd"`
IntermediateSize uint32 `json:"intermediate_size"`
NInner uint32 `json:"n_inner"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NHead uint32 `json:"n_head"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
RopeTheta float32 `json:"rope_theta"`
RopeScaling struct {
Type string `json:"type"`
Factor float32 `json:"factor"`
} `json:"rope_scaling"`
RMSNormEPS float32 `json:"rms_norm_eps"`
LayerNormEPS float32 `json:"layer_norm_eps"`
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
NormEpsilon float32 `json:"norm_epsilon"`
HeadDim uint32 `json:"head_dim"`
}
var _ Converter = (*llama)(nil)
func (p *llama) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t)
kv["general.architecture"] = "llama"
kv["general.name"] = "llama"
kv["llama.vocab_size"] = p.VocabSize
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
kv["llama.context_length"] = contextLength
}
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
}
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
}
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
}
if p.RopeTheta > 0 {
kv["llama.rope.freq_base"] = p.RopeTheta
}
if p.RopeScaling.Type == "linear" {
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
}
if p.NumKeyValueHeads > 0 {
kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
}
if p.RMSNormEPS > 0 {
kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
}
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
}
if p.HeadDim > 0 {
kv["llama.attention.key_length"] = p.HeadDim
kv["llama.attention.value_length"] = p.HeadDim
}
if len(t.Merges) > 0 {
kv["tokenizer.ggml.merges"] = t.Merges
}
return kv
}
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
name := p.tensorName(t.Name())
if strings.HasSuffix(name, "attn_q.weight") ||
strings.HasSuffix(name, "attn_k.weight") {
t.SetRepacker(p.repack)
}
out = append(out, llm.Tensor{
Name: name,
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *llama) tensorName(n string) string {
return strings.NewReplacer(
"lm_head", "output",
"model.embed_tokens", "token_embd",
"model.norm", "output_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"mlp.gate_proj", "ffn_gate",
"mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm",
// mixtral
"block_sparse_moe.gate", "ffn_gate_inp",
).Replace(n)
}
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
var dims []int
for _, dim := range shape {
dims = append(dims, int(dim))
}
var heads uint32
if strings.HasSuffix(name, "q_proj.weight") {
heads = p.NumAttentionHeads
} else if strings.HasSuffix(name, "k_proj.weight") {
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
} else {
return nil, fmt.Errorf("unknown tensor for repack: %s", name)
}
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(dims...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 1)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}
package convert
import (
"fmt"
"io"
"slices"
"strings"
"github.com/ollama/ollama/llm"
)
type mixtral struct {
llama
NumLocalExperts uint32 `json:"num_local_experts"`
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
}
var _ Converter = (*mixtral)(nil)
func (p *mixtral) KV(t *Tokenizer) llm.KV {
kv := p.llama.KV(t)
if p.NumLocalExperts > 0 {
kv["llama.expert_count"] = p.NumLocalExperts
}
if p.NumExpertsPerToken > 0 {
kv["llama.expert_used_count"] = p.NumExpertsPerToken
}
return kv
}
func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
oldnew := []string{
"model.layers", "blk",
"w1", "ffn_gate_exps",
"w2", "ffn_down_exps",
"w3", "ffn_up_exps",
}
for i := range p.NumLocalExperts {
oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
}
// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
namer := strings.NewReplacer(oldnew...)
experts := make(map[string]experts)
// merge experts into a single tensor while removing them from ts
ts = slices.DeleteFunc(ts, func(t Tensor) bool {
if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
return false
}
name := namer.Replace(t.Name())
experts[name] = append(experts[name], t)
return true
})
var out []llm.Tensor
for n, e := range experts {
// TODO(mxyng): sanity check experts
out = append(out, llm.Tensor{
Name: n,
Kind: e[0].Kind(),
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),
WriterTo: e,
})
}
return append(out, p.llama.Tensors(ts)...)
}
type experts []Tensor
func (e experts) WriteTo(w io.Writer) (int64, error) {
// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
for _, t := range e {
// the canonical merged experts tensor stacks all experts along a new, 0 axis,
// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
// this accomplishes the same thing by writing each expert tensor in sequence
if _, err := t.WriteTo(w); err != nil {
return 0, err
}
}
return 0, nil
}
package convert
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"io"
"io/fs"
"log/slog"
"math"
"os"
"path/filepath"
"slices"
"testing"
"golang.org/x/exp/maps"
"github.com/ollama/ollama/llm"
)
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), "f16")
if err != nil {
t.Fatal(err)
}
defer f.Close()
if err := Convert(fsys, f); err != nil {
t.Fatal(err)
}
r, err := os.Open(f.Name())
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { r.Close() })
m, _, err := llm.DecodeGGML(r, math.MaxInt)
if err != nil {
t.Fatal(err)
}
if _, err := r.Seek(0, io.SeekStart); err != nil {
t.Fatal(err)
}
return r, m.KV(), m.Tensors()
}
func TestMain(m *testing.M) {
var level slog.Level
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
flag.Parse()
slog.SetLogLoggerLevel(level)
os.Exit(m.Run())
}
func TestConvertFull(t *testing.T) {
cases := []string{
"Meta-Llama-3-8B-Instruct",
"Mistral-7B-Instruct-v0.2",
"Mixtral-8x7B-Instruct-v0.1",
"gemma-2b-it",
}
for i := range cases {
tt := cases[i]
t.Run(tt, func(t *testing.T) {
t.Parallel()
p := filepath.Join("testdata", tt)
if testing.Short() {
t.Skip("skipping in short mode")
} else if _, err := os.Stat(p); err != nil {
t.Skipf("%s not found", p)
}
f, kv, tensors := convertFull(t, os.DirFS(p))
actual := make(map[string]string)
for k, v := range kv {
if s, ok := v.(json.Marshaler); !ok {
actual[k] = fmt.Sprintf("%v", v)
} else {
bts, err := json.Marshal(s)
if err != nil {
t.Fatal(err)
}
actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
}
}
for _, tensor := range tensors.Items {
sha256sum := sha256.New()
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
if _, err := io.Copy(sha256sum, sr); err != nil {
t.Fatal(err)
}
actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
}
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
if err != nil {
t.Fatal(err)
}
var expect map[string]string
if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
t.Fatal(err)
}
keys := maps.Keys(expect)
slices.Sort(keys)
for _, k := range keys {
if v, ok := actual[k]; !ok {
t.Errorf("missing %s", k)
} else if v != expect[k] {
t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
}
}
})
}
}
package convert
import (
"archive/zip"
"errors"
"io"
"io/fs"
"os"
"path/filepath"
)
type ZipReader struct {
r *zip.Reader
p string
// limit is the maximum size of a file that can be read directly
// from the zip archive. Files larger than this size will be extracted
limit int64
}
func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
return &ZipReader{r, p, limit}
}
func (z *ZipReader) Open(name string) (fs.File, error) {
r, err := z.r.Open(name)
if err != nil {
return nil, err
}
defer r.Close()
if fi, err := r.Stat(); err != nil {
return nil, err
} else if fi.Size() < z.limit {
return r, nil
}
if !filepath.IsLocal(name) {
return nil, zip.ErrInsecurePath
}
n := filepath.Join(z.p, name)
if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
w, err := os.Create(n)
if err != nil {
return nil, err
}
defer w.Close()
if _, err := io.Copy(w, r); err != nil {
return nil, err
}
} else if err != nil {
return nil, err
}
return os.Open(n)
}
package convert
import (
"errors"
"io"
"io/fs"
"strings"
)
type Tensor interface {
Name() string
Shape() []uint64
Kind() uint32
SetRepacker(repacker)
WriteTo(io.Writer) (int64, error)
}
type tensorBase struct {
name string
shape []uint64
repacker
}
func (t tensorBase) Name() string {
return t.name
}
func (t tensorBase) Shape() []uint64 {
return t.shape
}
const (
tensorKindF32 uint32 = iota
tensorKindF16
)
func (t tensorBase) Kind() uint32 {
if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") {
return 0
}
switch len(t.shape) {
case 0:
panic("invalid tensor shape")
case 1:
return tensorKindF32
default:
return tensorKindF16
}
}
func (t *tensorBase) SetRepacker(fn repacker) {
t.repacker = fn
}
type repacker func(string, []float32, []uint64) ([]float32, error)
func parseTensors(fsys fs.FS) ([]Tensor, error) {
patterns := []struct {
Pattern string
Func func(fs.FS, ...string) ([]Tensor, error)
}{
{"model-*-of-*.safetensors", parseSafetensors},
{"model.safetensors", parseSafetensors},
{"pytorch_model-*-of-*.bin", parseTorch},
{"pytorch_model.bin", parseTorch},
{"consolidated.*.pth", parseTorch},
}
for _, pattern := range patterns {
matches, err := fs.Glob(fsys, pattern.Pattern)
if err != nil {
return nil, err
}
if len(matches) > 0 {
return pattern.Func(fsys, matches...)
}
}
return nil, errors.New("unknown tensor format")
}
package convert
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"io/fs"
"slices"
"github.com/d4l3k/go-bfloat16"
"github.com/x448/float16"
"golang.org/x/exp/maps"
)
type safetensorMetadata struct {
Type string `json:"dtype"`
Shape []uint64 `json:"shape"`
Offsets []int64 `json:"data_offsets"`
}
func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) {
var ts []Tensor
for _, p := range ps {
f, err := fsys.Open(p)
if err != nil {
return nil, err
}
defer f.Close()
var n int64
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
return nil, err
}
b := bytes.NewBuffer(make([]byte, 0, n))
if _, err = io.CopyN(b, f, n); err != nil {
return nil, err
}
var headers map[string]safetensorMetadata
if err := json.NewDecoder(b).Decode(&headers); err != nil {
return nil, err
}
keys := maps.Keys(headers)
slices.Sort(keys)
for _, key := range keys {
if value := headers[key]; value.Type != "" {
ts = append(ts, safetensor{
fs: fsys,
path: p,
dtype: value.Type,
offset: safetensorsPad(n, value.Offsets[0]),
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
tensorBase: &tensorBase{
name: key,
shape: value.Shape,
},
})
}
}
}
return ts, nil
}
// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
func safetensorsPad(n, offset int64) int64 {
return 8 + n + offset
}
type safetensor struct {
fs fs.FS
path string
dtype string
offset int64
size int64
*tensorBase
}
func (st safetensor) WriteTo(w io.Writer) (int64, error) {
f, err := st.fs.Open(st.path)
if err != nil {
return 0, err
}
defer f.Close()
if seeker, ok := f.(io.Seeker); ok {
if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
return 0, err
}
} else {
if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
return 0, err
}
}
var f32s []float32
switch st.dtype {
case "F32":
f32s = make([]float32, st.size/4)
if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
return 0, err
}
case "F16":
u16s := make([]uint16, st.size/2)
if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
return 0, err
}
f32s = make([]float32, len(u16s))
for i := range u16s {
f32s[i] = float16.Frombits(u16s[i]).Float32()
}
case "BF16":
u8s := make([]uint8, st.size)
if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
return 0, err
}
f32s = bfloat16.DecodeFloat32(u8s)
default:
return 0, fmt.Errorf("unknown data type: %s", st.dtype)
}
if st.repacker != nil {
f32s, err = st.repacker(st.Name(), f32s, st.Shape())
if err != nil {
return 0, err
}
}
switch st.Kind() {
case tensorKindF32:
return 0, binary.Write(w, binary.LittleEndian, f32s)
case tensorKindF16:
f16s := make([]uint16, len(f32s))
for i := range f32s {
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
}
return 0, binary.Write(w, binary.LittleEndian, f16s)
default:
return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
}
}
package convert
import (
"io"
"io/fs"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types"
)
func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) {
var ts []Tensor
for _, p := range ps {
pt, err := pytorch.Load(p)
if err != nil {
return nil, err
}
for _, k := range pt.(*types.Dict).Keys() {
t := pt.(*types.Dict).MustGet(k)
var shape []uint64
for dim := range t.(*pytorch.Tensor).Size {
shape = append(shape, uint64(dim))
}
ts = append(ts, torch{
storage: t.(*pytorch.Tensor).Source,
tensorBase: &tensorBase{
name: k.(string),
shape: shape,
},
})
}
}
return ts, nil
}
type torch struct {
storage pytorch.StorageInterface
*tensorBase
}
func (pt torch) WriteTo(w io.Writer) (int64, error) {
return 0, nil
}
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.!
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.32.0
// protoc v4.25.2
// source: sentencepiece_model.proto
package sentencepiece
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
// Model type. only have UNIGRAM now.
type TrainerSpec_ModelType int32
const (
TrainerSpec_UNIGRAM TrainerSpec_ModelType = 1 // Unigram language model with dynamic algorithm
TrainerSpec_BPE TrainerSpec_ModelType = 2 // Byte Pair Encoding
TrainerSpec_WORD TrainerSpec_ModelType = 3 // Delimitered by whitespace.
TrainerSpec_CHAR TrainerSpec_ModelType = 4 // tokenizes into character sequence
)
// Enum value maps for TrainerSpec_ModelType.
var (
TrainerSpec_ModelType_name = map[int32]string{
1: "UNIGRAM",
2: "BPE",
3: "WORD",
4: "CHAR",
}
TrainerSpec_ModelType_value = map[string]int32{
"UNIGRAM": 1,
"BPE": 2,
"WORD": 3,
"CHAR": 4,
}
)
func (x TrainerSpec_ModelType) Enum() *TrainerSpec_ModelType {
p := new(TrainerSpec_ModelType)
*p = x
return p
}
func (x TrainerSpec_ModelType) String() string {
return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
}
func (TrainerSpec_ModelType) Descriptor() protoreflect.EnumDescriptor {
return file_sentencepiece_model_proto_enumTypes[0].Descriptor()
}
func (TrainerSpec_ModelType) Type() protoreflect.EnumType {
return &file_sentencepiece_model_proto_enumTypes[0]
}
func (x TrainerSpec_ModelType) Number() protoreflect.EnumNumber {
return protoreflect.EnumNumber(x)
}
// Deprecated: Do not use.
func (x *TrainerSpec_ModelType) UnmarshalJSON(b []byte) error {
num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b)
if err != nil {
return err
}
*x = TrainerSpec_ModelType(num)
return nil
}
// Deprecated: Use TrainerSpec_ModelType.Descriptor instead.
func (TrainerSpec_ModelType) EnumDescriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{0, 0}
}
type ModelProto_SentencePiece_Type int32
const (
ModelProto_SentencePiece_NORMAL ModelProto_SentencePiece_Type = 1 // normal symbol
ModelProto_SentencePiece_UNKNOWN ModelProto_SentencePiece_Type = 2 // unknown symbol. only <unk> for now.
ModelProto_SentencePiece_CONTROL ModelProto_SentencePiece_Type = 3 // control symbols. </s>, <s>, <2ja> etc.
ModelProto_SentencePiece_USER_DEFINED ModelProto_SentencePiece_Type = 4 // user defined symbols.
// Typical usage of USER_DEFINED symbol
// is placeholder.
ModelProto_SentencePiece_BYTE ModelProto_SentencePiece_Type = 6 // byte symbols. Used when `byte_fallback` is true.
ModelProto_SentencePiece_UNUSED ModelProto_SentencePiece_Type = 5 // this piece is not used.
)
// Enum value maps for ModelProto_SentencePiece_Type.
var (
ModelProto_SentencePiece_Type_name = map[int32]string{
1: "NORMAL",
2: "UNKNOWN",
3: "CONTROL",
4: "USER_DEFINED",
6: "BYTE",
5: "UNUSED",
}
ModelProto_SentencePiece_Type_value = map[string]int32{
"NORMAL": 1,
"UNKNOWN": 2,
"CONTROL": 3,
"USER_DEFINED": 4,
"BYTE": 6,
"UNUSED": 5,
}
)
func (x ModelProto_SentencePiece_Type) Enum() *ModelProto_SentencePiece_Type {
p := new(ModelProto_SentencePiece_Type)
*p = x
return p
}
func (x ModelProto_SentencePiece_Type) String() string {
return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
}
func (ModelProto_SentencePiece_Type) Descriptor() protoreflect.EnumDescriptor {
return file_sentencepiece_model_proto_enumTypes[1].Descriptor()
}
func (ModelProto_SentencePiece_Type) Type() protoreflect.EnumType {
return &file_sentencepiece_model_proto_enumTypes[1]
}
func (x ModelProto_SentencePiece_Type) Number() protoreflect.EnumNumber {
return protoreflect.EnumNumber(x)
}
// Deprecated: Do not use.
func (x *ModelProto_SentencePiece_Type) UnmarshalJSON(b []byte) error {
num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b)
if err != nil {
return err
}
*x = ModelProto_SentencePiece_Type(num)
return nil
}
// Deprecated: Use ModelProto_SentencePiece_Type.Descriptor instead.
func (ModelProto_SentencePiece_Type) EnumDescriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{3, 0, 0}
}
// TrainerSpec encodes a various parameters for SentencePiece training.
// Next id: 55
type TrainerSpec struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
extensionFields protoimpl.ExtensionFields
// /////////////////////////////////////////////////////////////////
// General parameters
//
// Input corpus files.
//
// Trainer accepts the following two formats:
// A) Monolingual: plain text, one sentence per line.
// B) Bilingual: TSV, source sentence <tab> target sentence
// When bilingual data is passed, shared vocabulary model is built.
// Note that the input file must be raw corpus, not a preprocessed corpus.
// Trainer only loads the first `input_sentence_size` sentences specified
// with this parameter.
Input []string `protobuf:"bytes,1,rep,name=input" json:"input,omitempty"`
// Input corpus format:
// "text": one-sentence-per-line text format (default)
// "tsv": sentence <tab> freq
InputFormat *string `protobuf:"bytes,7,opt,name=input_format,json=inputFormat" json:"input_format,omitempty"`
// Output model file prefix.
// <model_prefix>.model and <model_prefix>.vocab are generated.
ModelPrefix *string `protobuf:"bytes,2,opt,name=model_prefix,json=modelPrefix" json:"model_prefix,omitempty"`
ModelType *TrainerSpec_ModelType `protobuf:"varint,3,opt,name=model_type,json=modelType,enum=sentencepiece.TrainerSpec_ModelType,def=1" json:"model_type,omitempty"`
// Vocabulary size. 8k is the default size.
VocabSize *int32 `protobuf:"varint,4,opt,name=vocab_size,json=vocabSize,def=8000" json:"vocab_size,omitempty"`
// List of the languages this model can accept.
// Since the model is language-agnostic, this field is used as a reference.
AcceptLanguage []string `protobuf:"bytes,5,rep,name=accept_language,json=acceptLanguage" json:"accept_language,omitempty"`
// Size of self-test samples, which are encoded in the model file.
SelfTestSampleSize *int32 `protobuf:"varint,6,opt,name=self_test_sample_size,json=selfTestSampleSize,def=0" json:"self_test_sample_size,omitempty"`
// Whether to use DP version of sentencepiece. Use it with TSV input format
// (requires precomputed word tab counts to work).
EnableDifferentialPrivacy *bool `protobuf:"varint,50,opt,name=enable_differential_privacy,json=enableDifferentialPrivacy,def=0" json:"enable_differential_privacy,omitempty"`
// Set these parameters if you need DP version of sentencepiece.
// std of noise to add.
DifferentialPrivacyNoiseLevel *float32 `protobuf:"fixed32,51,opt,name=differential_privacy_noise_level,json=differentialPrivacyNoiseLevel,def=0" json:"differential_privacy_noise_level,omitempty"`
// Clipping threshold to apply after adding noise. All the words with
// frequency less than this value are dropped.
DifferentialPrivacyClippingThreshold *uint64 `protobuf:"varint,52,opt,name=differential_privacy_clipping_threshold,json=differentialPrivacyClippingThreshold,def=0" json:"differential_privacy_clipping_threshold,omitempty"`
// /////////////////////////////////////////////////////////////////
// Training parameters.
//
// Uses characters which cover the corpus with the ratio of `chars_coverage`.
// This parameter determines the set of basic Alphabet of sentence piece.
// 1.0 - `chars_coverage` characters are treated as UNK.
// See also required_chars field.
CharacterCoverage *float32 `protobuf:"fixed32,10,opt,name=character_coverage,json=characterCoverage,def=0.9995" json:"character_coverage,omitempty"`
// Maximum size of sentences the trainer loads from `input` parameter.
// Trainer simply loads the `input` files in sequence.
// It is better to shuffle the input corpus randomly.
InputSentenceSize *uint64 `protobuf:"varint,11,opt,name=input_sentence_size,json=inputSentenceSize,def=0" json:"input_sentence_size,omitempty"`
ShuffleInputSentence *bool `protobuf:"varint,19,opt,name=shuffle_input_sentence,json=shuffleInputSentence,def=1" json:"shuffle_input_sentence,omitempty"`
// Maximum size of sentences to make seed sentence pieces.
// Extended suffix array is constructed to extract frequent
// sub-strings from the corpus. This uses 20N working space,
// where N is the size of corpus.
//
// Deprecated: Marked as deprecated in sentencepiece_model.proto.
MiningSentenceSize *int32 `protobuf:"varint,12,opt,name=mining_sentence_size,json=miningSentenceSize" json:"mining_sentence_size,omitempty"`
// Maximum size of sentences to train sentence pieces.
//
// Deprecated: Marked as deprecated in sentencepiece_model.proto.
TrainingSentenceSize *int32 `protobuf:"varint,13,opt,name=training_sentence_size,json=trainingSentenceSize" json:"training_sentence_size,omitempty"`
// The size of seed sentencepieces.
// `seed_sentencepiece_size` must be larger than `vocab_size`.
SeedSentencepieceSize *int32 `protobuf:"varint,14,opt,name=seed_sentencepiece_size,json=seedSentencepieceSize,def=1000000" json:"seed_sentencepiece_size,omitempty"`
// In every EM sub-iterations, keeps top
// `shrinking_factor` * `current sentencepieces size` with respect to
// the loss of the sentence piece. This value should be smaller than 1.0.
ShrinkingFactor *float32 `protobuf:"fixed32,15,opt,name=shrinking_factor,json=shrinkingFactor,def=0.75" json:"shrinking_factor,omitempty"`
// The maximum sentence length in byte. The sentences with the length
// larger than `max_sentence_length` is simply ignored.
// Longer input tends to bring the following risks:
// - Overflow during EM training (unigram language model only)
// - Performance drop because of O(n log n) cost in BPE.
MaxSentenceLength *int32 `protobuf:"varint,18,opt,name=max_sentence_length,json=maxSentenceLength,def=4192" json:"max_sentence_length,omitempty"`
// Number of threads in the training.
NumThreads *int32 `protobuf:"varint,16,opt,name=num_threads,json=numThreads,def=16" json:"num_threads,omitempty"`
// Number of EM sub iterations.
NumSubIterations *int32 `protobuf:"varint,17,opt,name=num_sub_iterations,json=numSubIterations,def=2" json:"num_sub_iterations,omitempty"`
// /////////////////////////////////////////////////////////////////
// SentencePiece parameters which control the shapes of sentence piece.
//
// Maximum length of sentencepiece.
MaxSentencepieceLength *int32 `protobuf:"varint,20,opt,name=max_sentencepiece_length,json=maxSentencepieceLength,def=16" json:"max_sentencepiece_length,omitempty"`
// Uses Unicode script to split sentence pieces.
// When `split_by_unicode_script` is true, we do not allow sentence piece to
// include multiple Unicode scripts, e.g. "F1" is not a valid piece.
// Exception: CJ characters (Hiragana/Katakana/Han) are all handled
// as one script type, since Japanese word can consist of multiple scripts.
// This exception is always applied regardless of the accept-language
// parameter.
SplitByUnicodeScript *bool `protobuf:"varint,21,opt,name=split_by_unicode_script,json=splitByUnicodeScript,def=1" json:"split_by_unicode_script,omitempty"`
// When `split_by_number` is true, put a boundary between number and
// non-number transition. If we want to treat "F1" is one token, set this flag
// to be false.
SplitByNumber *bool `protobuf:"varint,23,opt,name=split_by_number,json=splitByNumber,def=1" json:"split_by_number,omitempty"`
// Use a white space to split sentence pieces.
// When `split_by_whitespace` is false, we may have the piece containing
// a white space in the middle. e.g., "in_the".
SplitByWhitespace *bool `protobuf:"varint,22,opt,name=split_by_whitespace,json=splitByWhitespace,def=1" json:"split_by_whitespace,omitempty"`
// Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
// hello_. When `treat_whitespace_as_suffix` is true,
// NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
// of sentence.
TreatWhitespaceAsSuffix *bool `protobuf:"varint,24,opt,name=treat_whitespace_as_suffix,json=treatWhitespaceAsSuffix,def=0" json:"treat_whitespace_as_suffix,omitempty"`
// Allows pieces that only contain whitespaces instead of appearing only as
// prefix or suffix of other pieces.
AllowWhitespaceOnlyPieces *bool `protobuf:"varint,26,opt,name=allow_whitespace_only_pieces,json=allowWhitespaceOnlyPieces,def=0" json:"allow_whitespace_only_pieces,omitempty"`
// Split all digits (0-9) into separate pieces.
SplitDigits *bool `protobuf:"varint,25,opt,name=split_digits,json=splitDigits,def=0" json:"split_digits,omitempty"`
// Defines the pre-tokenization delimiter.
// When specified, no pieces crossing this delimiter is not included
// in the vocab. Then the delimiter string is virtually ignored
// during the training. This field can allows constraints on the vocabulary
// selection. Note that this field is available on unigram mode.
PretokenizationDelimiter *string `protobuf:"bytes,53,opt,name=pretokenization_delimiter,json=pretokenizationDelimiter,def=" json:"pretokenization_delimiter,omitempty"`
// /////////////////////////////////////////////////////////////////
// Vocabulary management
//
// Defines control symbols used as an indicator to
// change the behavior of the decoder. <s> and </s> are pre-defined.
// We can use this field to encode various meta information,
// including language indicator in multilingual model.
// These symbols are not visible to users, but visible to
// the decoder. Note that when the input sentence contains control symbols,
// they are not treated as one token, but segmented into normal pieces.
// Control symbols must be inserted independently from the segmentation.
ControlSymbols []string `protobuf:"bytes,30,rep,name=control_symbols,json=controlSymbols" json:"control_symbols,omitempty"`
// Defines user defined symbols.
// These symbols are added with extremely high score
// so they are always treated as one unique symbol in any context.
// Typical usage of user_defined_symbols is placeholder for named entities.
UserDefinedSymbols []string `protobuf:"bytes,31,rep,name=user_defined_symbols,json=userDefinedSymbols" json:"user_defined_symbols,omitempty"`
// Defines required characters. Each UTF8 character in this string is included
// in the character set regardless of character_coverage value. Unlike
// user_defined_symbols, these characters have scores based on the frequency
// on input sentences, and the model can form subwords using characters
// in this field.
RequiredChars *string `protobuf:"bytes,36,opt,name=required_chars,json=requiredChars" json:"required_chars,omitempty"`
// Decomposes unknown pieces into UTF-8 bytes.
ByteFallback *bool `protobuf:"varint,35,opt,name=byte_fallback,json=byteFallback,def=0" json:"byte_fallback,omitempty"`
// When creating the vocabulary file, defines whether or not to additionally
// output the score for each piece.
VocabularyOutputPieceScore *bool `protobuf:"varint,32,opt,name=vocabulary_output_piece_score,json=vocabularyOutputPieceScore,def=1" json:"vocabulary_output_piece_score,omitempty"`
// `vocab_size` is treated as hard limit. Crash if
// the model can not produce the vocab of size `vocab_size`,
// When `hard_vocab_limit` is false, vocab_size is treated
// as soft limit. Note that when model_type=char,
// always assumes hard_vocab_limit = false.
HardVocabLimit *bool `protobuf:"varint,33,opt,name=hard_vocab_limit,json=hardVocabLimit,def=1" json:"hard_vocab_limit,omitempty"`
// use all symbols for vocab extraction. This flag is valid
// if model type is either CHAR or WORD
UseAllVocab *bool `protobuf:"varint,34,opt,name=use_all_vocab,json=useAllVocab,def=0" json:"use_all_vocab,omitempty"`
// /////////////////////////////////////////////////////////////////
// Reserved special meta tokens.
// * -1 is not used.
// * unk_id must not be -1.
// Id must starts with 0 and be contigous.
UnkId *int32 `protobuf:"varint,40,opt,name=unk_id,json=unkId,def=0" json:"unk_id,omitempty"` // <unk>
BosId *int32 `protobuf:"varint,41,opt,name=bos_id,json=bosId,def=1" json:"bos_id,omitempty"` // <s>
EosId *int32 `protobuf:"varint,42,opt,name=eos_id,json=eosId,def=2" json:"eos_id,omitempty"` // </s>
PadId *int32 `protobuf:"varint,43,opt,name=pad_id,json=padId,def=-1" json:"pad_id,omitempty"` // <pad> (padding)
UnkPiece *string `protobuf:"bytes,45,opt,name=unk_piece,json=unkPiece,def=<unk>" json:"unk_piece,omitempty"`
BosPiece *string `protobuf:"bytes,46,opt,name=bos_piece,json=bosPiece,def=<s>" json:"bos_piece,omitempty"`
EosPiece *string `protobuf:"bytes,47,opt,name=eos_piece,json=eosPiece,def=</s>" json:"eos_piece,omitempty"`
PadPiece *string `protobuf:"bytes,48,opt,name=pad_piece,json=padPiece,def=<pad>" json:"pad_piece,omitempty"`
// Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
// since this character can be useful both for user and
// developer. We can easily figure out that <unk> is emitted.
UnkSurface *string `protobuf:"bytes,44,opt,name=unk_surface,json=unkSurface,def= ⁇ " json:"unk_surface,omitempty"`
// Increase bit depth to allow unigram model training on large
// (>10M sentences) corpora. A Side-effect of enabling this flag
// is increased memory usage.
TrainExtremelyLargeCorpus *bool `protobuf:"varint,49,opt,name=train_extremely_large_corpus,json=trainExtremelyLargeCorpus,def=0" json:"train_extremely_large_corpus,omitempty"`
// Path to a seed sentencepieces file, with one tab-separated
// seed sentencepiece <tab> frequency per line.
SeedSentencepiecesFile *string `protobuf:"bytes,54,opt,name=seed_sentencepieces_file,json=seedSentencepiecesFile,def=" json:"seed_sentencepieces_file,omitempty"`
}
// Default values for TrainerSpec fields.
const (
Default_TrainerSpec_ModelType = TrainerSpec_UNIGRAM
Default_TrainerSpec_VocabSize = int32(8000)
Default_TrainerSpec_SelfTestSampleSize = int32(0)
Default_TrainerSpec_EnableDifferentialPrivacy = bool(false)
Default_TrainerSpec_DifferentialPrivacyNoiseLevel = float32(0)
Default_TrainerSpec_DifferentialPrivacyClippingThreshold = uint64(0)
Default_TrainerSpec_CharacterCoverage = float32(0.9994999766349792)
Default_TrainerSpec_InputSentenceSize = uint64(0)
Default_TrainerSpec_ShuffleInputSentence = bool(true)
Default_TrainerSpec_SeedSentencepieceSize = int32(1000000)
Default_TrainerSpec_ShrinkingFactor = float32(0.75)
Default_TrainerSpec_MaxSentenceLength = int32(4192)
Default_TrainerSpec_NumThreads = int32(16)
Default_TrainerSpec_NumSubIterations = int32(2)
Default_TrainerSpec_MaxSentencepieceLength = int32(16)
Default_TrainerSpec_SplitByUnicodeScript = bool(true)
Default_TrainerSpec_SplitByNumber = bool(true)
Default_TrainerSpec_SplitByWhitespace = bool(true)
Default_TrainerSpec_TreatWhitespaceAsSuffix = bool(false)
Default_TrainerSpec_AllowWhitespaceOnlyPieces = bool(false)
Default_TrainerSpec_SplitDigits = bool(false)
Default_TrainerSpec_PretokenizationDelimiter = string("")
Default_TrainerSpec_ByteFallback = bool(false)
Default_TrainerSpec_VocabularyOutputPieceScore = bool(true)
Default_TrainerSpec_HardVocabLimit = bool(true)
Default_TrainerSpec_UseAllVocab = bool(false)
Default_TrainerSpec_UnkId = int32(0)
Default_TrainerSpec_BosId = int32(1)
Default_TrainerSpec_EosId = int32(2)
Default_TrainerSpec_PadId = int32(-1)
Default_TrainerSpec_UnkPiece = string("<unk>")
Default_TrainerSpec_BosPiece = string("<s>")
Default_TrainerSpec_EosPiece = string("</s>")
Default_TrainerSpec_PadPiece = string("<pad>")
Default_TrainerSpec_UnkSurface = string(" ⁇ ")
Default_TrainerSpec_TrainExtremelyLargeCorpus = bool(false)
Default_TrainerSpec_SeedSentencepiecesFile = string("")
)
func (x *TrainerSpec) Reset() {
*x = TrainerSpec{}
if protoimpl.UnsafeEnabled {
mi := &file_sentencepiece_model_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *TrainerSpec) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*TrainerSpec) ProtoMessage() {}
func (x *TrainerSpec) ProtoReflect() protoreflect.Message {
mi := &file_sentencepiece_model_proto_msgTypes[0]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use TrainerSpec.ProtoReflect.Descriptor instead.
func (*TrainerSpec) Descriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{0}
}
func (x *TrainerSpec) GetInput() []string {
if x != nil {
return x.Input
}
return nil
}
func (x *TrainerSpec) GetInputFormat() string {
if x != nil && x.InputFormat != nil {
return *x.InputFormat
}
return ""
}
func (x *TrainerSpec) GetModelPrefix() string {
if x != nil && x.ModelPrefix != nil {
return *x.ModelPrefix
}
return ""
}
func (x *TrainerSpec) GetModelType() TrainerSpec_ModelType {
if x != nil && x.ModelType != nil {
return *x.ModelType
}
return Default_TrainerSpec_ModelType
}
func (x *TrainerSpec) GetVocabSize() int32 {
if x != nil && x.VocabSize != nil {
return *x.VocabSize
}
return Default_TrainerSpec_VocabSize
}
func (x *TrainerSpec) GetAcceptLanguage() []string {
if x != nil {
return x.AcceptLanguage
}
return nil
}
func (x *TrainerSpec) GetSelfTestSampleSize() int32 {
if x != nil && x.SelfTestSampleSize != nil {
return *x.SelfTestSampleSize
}
return Default_TrainerSpec_SelfTestSampleSize
}
func (x *TrainerSpec) GetEnableDifferentialPrivacy() bool {
if x != nil && x.EnableDifferentialPrivacy != nil {
return *x.EnableDifferentialPrivacy
}
return Default_TrainerSpec_EnableDifferentialPrivacy
}
func (x *TrainerSpec) GetDifferentialPrivacyNoiseLevel() float32 {
if x != nil && x.DifferentialPrivacyNoiseLevel != nil {
return *x.DifferentialPrivacyNoiseLevel
}
return Default_TrainerSpec_DifferentialPrivacyNoiseLevel
}
func (x *TrainerSpec) GetDifferentialPrivacyClippingThreshold() uint64 {
if x != nil && x.DifferentialPrivacyClippingThreshold != nil {
return *x.DifferentialPrivacyClippingThreshold
}
return Default_TrainerSpec_DifferentialPrivacyClippingThreshold
}
func (x *TrainerSpec) GetCharacterCoverage() float32 {
if x != nil && x.CharacterCoverage != nil {
return *x.CharacterCoverage
}
return Default_TrainerSpec_CharacterCoverage
}
func (x *TrainerSpec) GetInputSentenceSize() uint64 {
if x != nil && x.InputSentenceSize != nil {
return *x.InputSentenceSize
}
return Default_TrainerSpec_InputSentenceSize
}
func (x *TrainerSpec) GetShuffleInputSentence() bool {
if x != nil && x.ShuffleInputSentence != nil {
return *x.ShuffleInputSentence
}
return Default_TrainerSpec_ShuffleInputSentence
}
// Deprecated: Marked as deprecated in sentencepiece_model.proto.
func (x *TrainerSpec) GetMiningSentenceSize() int32 {
if x != nil && x.MiningSentenceSize != nil {
return *x.MiningSentenceSize
}
return 0
}
// Deprecated: Marked as deprecated in sentencepiece_model.proto.
func (x *TrainerSpec) GetTrainingSentenceSize() int32 {
if x != nil && x.TrainingSentenceSize != nil {
return *x.TrainingSentenceSize
}
return 0
}
func (x *TrainerSpec) GetSeedSentencepieceSize() int32 {
if x != nil && x.SeedSentencepieceSize != nil {
return *x.SeedSentencepieceSize
}
return Default_TrainerSpec_SeedSentencepieceSize
}
func (x *TrainerSpec) GetShrinkingFactor() float32 {
if x != nil && x.ShrinkingFactor != nil {
return *x.ShrinkingFactor
}
return Default_TrainerSpec_ShrinkingFactor
}
func (x *TrainerSpec) GetMaxSentenceLength() int32 {
if x != nil && x.MaxSentenceLength != nil {
return *x.MaxSentenceLength
}
return Default_TrainerSpec_MaxSentenceLength
}
func (x *TrainerSpec) GetNumThreads() int32 {
if x != nil && x.NumThreads != nil {
return *x.NumThreads
}
return Default_TrainerSpec_NumThreads
}
func (x *TrainerSpec) GetNumSubIterations() int32 {
if x != nil && x.NumSubIterations != nil {
return *x.NumSubIterations
}
return Default_TrainerSpec_NumSubIterations
}
func (x *TrainerSpec) GetMaxSentencepieceLength() int32 {
if x != nil && x.MaxSentencepieceLength != nil {
return *x.MaxSentencepieceLength
}
return Default_TrainerSpec_MaxSentencepieceLength
}
func (x *TrainerSpec) GetSplitByUnicodeScript() bool {
if x != nil && x.SplitByUnicodeScript != nil {
return *x.SplitByUnicodeScript
}
return Default_TrainerSpec_SplitByUnicodeScript
}
func (x *TrainerSpec) GetSplitByNumber() bool {
if x != nil && x.SplitByNumber != nil {
return *x.SplitByNumber
}
return Default_TrainerSpec_SplitByNumber
}
func (x *TrainerSpec) GetSplitByWhitespace() bool {
if x != nil && x.SplitByWhitespace != nil {
return *x.SplitByWhitespace
}
return Default_TrainerSpec_SplitByWhitespace
}
func (x *TrainerSpec) GetTreatWhitespaceAsSuffix() bool {
if x != nil && x.TreatWhitespaceAsSuffix != nil {
return *x.TreatWhitespaceAsSuffix
}
return Default_TrainerSpec_TreatWhitespaceAsSuffix
}
func (x *TrainerSpec) GetAllowWhitespaceOnlyPieces() bool {
if x != nil && x.AllowWhitespaceOnlyPieces != nil {
return *x.AllowWhitespaceOnlyPieces
}
return Default_TrainerSpec_AllowWhitespaceOnlyPieces
}
func (x *TrainerSpec) GetSplitDigits() bool {
if x != nil && x.SplitDigits != nil {
return *x.SplitDigits
}
return Default_TrainerSpec_SplitDigits
}
func (x *TrainerSpec) GetPretokenizationDelimiter() string {
if x != nil && x.PretokenizationDelimiter != nil {
return *x.PretokenizationDelimiter
}
return Default_TrainerSpec_PretokenizationDelimiter
}
func (x *TrainerSpec) GetControlSymbols() []string {
if x != nil {
return x.ControlSymbols
}
return nil
}
func (x *TrainerSpec) GetUserDefinedSymbols() []string {
if x != nil {
return x.UserDefinedSymbols
}
return nil
}
func (x *TrainerSpec) GetRequiredChars() string {
if x != nil && x.RequiredChars != nil {
return *x.RequiredChars
}
return ""
}
func (x *TrainerSpec) GetByteFallback() bool {
if x != nil && x.ByteFallback != nil {
return *x.ByteFallback
}
return Default_TrainerSpec_ByteFallback
}
func (x *TrainerSpec) GetVocabularyOutputPieceScore() bool {
if x != nil && x.VocabularyOutputPieceScore != nil {
return *x.VocabularyOutputPieceScore
}
return Default_TrainerSpec_VocabularyOutputPieceScore
}
func (x *TrainerSpec) GetHardVocabLimit() bool {
if x != nil && x.HardVocabLimit != nil {
return *x.HardVocabLimit
}
return Default_TrainerSpec_HardVocabLimit
}
func (x *TrainerSpec) GetUseAllVocab() bool {
if x != nil && x.UseAllVocab != nil {
return *x.UseAllVocab
}
return Default_TrainerSpec_UseAllVocab
}
func (x *TrainerSpec) GetUnkId() int32 {
if x != nil && x.UnkId != nil {
return *x.UnkId
}
return Default_TrainerSpec_UnkId
}
func (x *TrainerSpec) GetBosId() int32 {
if x != nil && x.BosId != nil {
return *x.BosId
}
return Default_TrainerSpec_BosId
}
func (x *TrainerSpec) GetEosId() int32 {
if x != nil && x.EosId != nil {
return *x.EosId
}
return Default_TrainerSpec_EosId
}
func (x *TrainerSpec) GetPadId() int32 {
if x != nil && x.PadId != nil {
return *x.PadId
}
return Default_TrainerSpec_PadId
}
func (x *TrainerSpec) GetUnkPiece() string {
if x != nil && x.UnkPiece != nil {
return *x.UnkPiece
}
return Default_TrainerSpec_UnkPiece
}
func (x *TrainerSpec) GetBosPiece() string {
if x != nil && x.BosPiece != nil {
return *x.BosPiece
}
return Default_TrainerSpec_BosPiece
}
func (x *TrainerSpec) GetEosPiece() string {
if x != nil && x.EosPiece != nil {
return *x.EosPiece
}
return Default_TrainerSpec_EosPiece
}
func (x *TrainerSpec) GetPadPiece() string {
if x != nil && x.PadPiece != nil {
return *x.PadPiece
}
return Default_TrainerSpec_PadPiece
}
func (x *TrainerSpec) GetUnkSurface() string {
if x != nil && x.UnkSurface != nil {
return *x.UnkSurface
}
return Default_TrainerSpec_UnkSurface
}
func (x *TrainerSpec) GetTrainExtremelyLargeCorpus() bool {
if x != nil && x.TrainExtremelyLargeCorpus != nil {
return *x.TrainExtremelyLargeCorpus
}
return Default_TrainerSpec_TrainExtremelyLargeCorpus
}
func (x *TrainerSpec) GetSeedSentencepiecesFile() string {
if x != nil && x.SeedSentencepiecesFile != nil {
return *x.SeedSentencepiecesFile
}
return Default_TrainerSpec_SeedSentencepiecesFile
}
// NormalizerSpec encodes a various parameters for string normalizaiton
type NormalizerSpec struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
extensionFields protoimpl.ExtensionFields
// name of normalization rule.
Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
// Pre-compiled normalization rule created by
// Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
// Usually this field is set by Builder::GetNormalizerSpec() method.
PrecompiledCharsmap []byte `protobuf:"bytes,2,opt,name=precompiled_charsmap,json=precompiledCharsmap" json:"precompiled_charsmap,omitempty"`
// Adds dummy whitespace at the beginning of text in order to
// treat "world" in "world" and "hello world" in the same way.
AddDummyPrefix *bool `protobuf:"varint,3,opt,name=add_dummy_prefix,json=addDummyPrefix,def=1" json:"add_dummy_prefix,omitempty"`
// Removes leading, trailing, and duplicate internal whitespace.
RemoveExtraWhitespaces *bool `protobuf:"varint,4,opt,name=remove_extra_whitespaces,json=removeExtraWhitespaces,def=1" json:"remove_extra_whitespaces,omitempty"`
// Replaces whitespace with meta symbol.
// This field must be true to train sentence piece model.
EscapeWhitespaces *bool `protobuf:"varint,5,opt,name=escape_whitespaces,json=escapeWhitespaces,def=1" json:"escape_whitespaces,omitempty"`
// Custom normalization rule file in TSV format.
// https://github.com/google/sentencepiece/blob/master/doc/normalization.md
// This field is only used in SentencePieceTrainer::Train() method, which
// compiles the rule into the binary rule stored in `precompiled_charsmap`.
NormalizationRuleTsv *string `protobuf:"bytes,6,opt,name=normalization_rule_tsv,json=normalizationRuleTsv" json:"normalization_rule_tsv,omitempty"`
}
// Default values for NormalizerSpec fields.
const (
Default_NormalizerSpec_AddDummyPrefix = bool(true)
Default_NormalizerSpec_RemoveExtraWhitespaces = bool(true)
Default_NormalizerSpec_EscapeWhitespaces = bool(true)
)
func (x *NormalizerSpec) Reset() {
*x = NormalizerSpec{}
if protoimpl.UnsafeEnabled {
mi := &file_sentencepiece_model_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *NormalizerSpec) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*NormalizerSpec) ProtoMessage() {}
func (x *NormalizerSpec) ProtoReflect() protoreflect.Message {
mi := &file_sentencepiece_model_proto_msgTypes[1]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use NormalizerSpec.ProtoReflect.Descriptor instead.
func (*NormalizerSpec) Descriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{1}
}
func (x *NormalizerSpec) GetName() string {
if x != nil && x.Name != nil {
return *x.Name
}
return ""
}
func (x *NormalizerSpec) GetPrecompiledCharsmap() []byte {
if x != nil {
return x.PrecompiledCharsmap
}
return nil
}
func (x *NormalizerSpec) GetAddDummyPrefix() bool {
if x != nil && x.AddDummyPrefix != nil {
return *x.AddDummyPrefix
}
return Default_NormalizerSpec_AddDummyPrefix
}
func (x *NormalizerSpec) GetRemoveExtraWhitespaces() bool {
if x != nil && x.RemoveExtraWhitespaces != nil {
return *x.RemoveExtraWhitespaces
}
return Default_NormalizerSpec_RemoveExtraWhitespaces
}
func (x *NormalizerSpec) GetEscapeWhitespaces() bool {
if x != nil && x.EscapeWhitespaces != nil {
return *x.EscapeWhitespaces
}
return Default_NormalizerSpec_EscapeWhitespaces
}
func (x *NormalizerSpec) GetNormalizationRuleTsv() string {
if x != nil && x.NormalizationRuleTsv != nil {
return *x.NormalizationRuleTsv
}
return ""
}
// Proto to store samples for self-testing.
type SelfTestData struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
extensionFields protoimpl.ExtensionFields
Samples []*SelfTestData_Sample `protobuf:"bytes,1,rep,name=samples" json:"samples,omitempty"`
}
func (x *SelfTestData) Reset() {
*x = SelfTestData{}
if protoimpl.UnsafeEnabled {
mi := &file_sentencepiece_model_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *SelfTestData) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SelfTestData) ProtoMessage() {}
func (x *SelfTestData) ProtoReflect() protoreflect.Message {
mi := &file_sentencepiece_model_proto_msgTypes[2]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SelfTestData.ProtoReflect.Descriptor instead.
func (*SelfTestData) Descriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{2}
}
func (x *SelfTestData) GetSamples() []*SelfTestData_Sample {
if x != nil {
return x.Samples
}
return nil
}
// ModelProto stores model parameters.
// SentencePieceProcessor is supposed to be self-contained.
// All settings/parameters which may change the behavior must be encoded
// in ModelProto.
type ModelProto struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
extensionFields protoimpl.ExtensionFields
// Sentence pieces with scores.
Pieces []*ModelProto_SentencePiece `protobuf:"bytes,1,rep,name=pieces" json:"pieces,omitempty"`
// Spec used to generate this model file.
TrainerSpec *TrainerSpec `protobuf:"bytes,2,opt,name=trainer_spec,json=trainerSpec" json:"trainer_spec,omitempty"`
// Spec for text normalization.
NormalizerSpec *NormalizerSpec `protobuf:"bytes,3,opt,name=normalizer_spec,json=normalizerSpec" json:"normalizer_spec,omitempty"`
// Stores sample input and its expected segmentation to verify the model.
SelfTestData *SelfTestData `protobuf:"bytes,4,opt,name=self_test_data,json=selfTestData" json:"self_test_data,omitempty"`
// Spec for text de-normalization.
DenormalizerSpec *NormalizerSpec `protobuf:"bytes,5,opt,name=denormalizer_spec,json=denormalizerSpec" json:"denormalizer_spec,omitempty"`
}
func (x *ModelProto) Reset() {
*x = ModelProto{}
if protoimpl.UnsafeEnabled {
mi := &file_sentencepiece_model_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ModelProto) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ModelProto) ProtoMessage() {}
func (x *ModelProto) ProtoReflect() protoreflect.Message {
mi := &file_sentencepiece_model_proto_msgTypes[3]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ModelProto.ProtoReflect.Descriptor instead.
func (*ModelProto) Descriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{3}
}
func (x *ModelProto) GetPieces() []*ModelProto_SentencePiece {
if x != nil {
return x.Pieces
}
return nil
}
func (x *ModelProto) GetTrainerSpec() *TrainerSpec {
if x != nil {
return x.TrainerSpec
}
return nil
}
func (x *ModelProto) GetNormalizerSpec() *NormalizerSpec {
if x != nil {
return x.NormalizerSpec
}
return nil
}
func (x *ModelProto) GetSelfTestData() *SelfTestData {
if x != nil {
return x.SelfTestData
}
return nil
}
func (x *ModelProto) GetDenormalizerSpec() *NormalizerSpec {
if x != nil {
return x.DenormalizerSpec
}
return nil
}
type SelfTestData_Sample struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
Input *string `protobuf:"bytes,1,opt,name=input" json:"input,omitempty"`
Expected *string `protobuf:"bytes,2,opt,name=expected" json:"expected,omitempty"`
}
func (x *SelfTestData_Sample) Reset() {
*x = SelfTestData_Sample{}
if protoimpl.UnsafeEnabled {
mi := &file_sentencepiece_model_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *SelfTestData_Sample) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SelfTestData_Sample) ProtoMessage() {}
func (x *SelfTestData_Sample) ProtoReflect() protoreflect.Message {
mi := &file_sentencepiece_model_proto_msgTypes[4]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SelfTestData_Sample.ProtoReflect.Descriptor instead.
func (*SelfTestData_Sample) Descriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{2, 0}
}
func (x *SelfTestData_Sample) GetInput() string {
if x != nil && x.Input != nil {
return *x.Input
}
return ""
}
func (x *SelfTestData_Sample) GetExpected() string {
if x != nil && x.Expected != nil {
return *x.Expected
}
return ""
}
type ModelProto_SentencePiece struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
extensionFields protoimpl.ExtensionFields
Piece *string `protobuf:"bytes,1,opt,name=piece" json:"piece,omitempty"` // piece must not be empty.
Score *float32 `protobuf:"fixed32,2,opt,name=score" json:"score,omitempty"`
Type *ModelProto_SentencePiece_Type `protobuf:"varint,3,opt,name=type,enum=sentencepiece.ModelProto_SentencePiece_Type,def=1" json:"type,omitempty"`
}
// Default values for ModelProto_SentencePiece fields.
const (
Default_ModelProto_SentencePiece_Type = ModelProto_SentencePiece_NORMAL
)
func (x *ModelProto_SentencePiece) Reset() {
*x = ModelProto_SentencePiece{}
if protoimpl.UnsafeEnabled {
mi := &file_sentencepiece_model_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ModelProto_SentencePiece) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ModelProto_SentencePiece) ProtoMessage() {}
func (x *ModelProto_SentencePiece) ProtoReflect() protoreflect.Message {
mi := &file_sentencepiece_model_proto_msgTypes[5]
if protoimpl.UnsafeEnabled && x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ModelProto_SentencePiece.ProtoReflect.Descriptor instead.
func (*ModelProto_SentencePiece) Descriptor() ([]byte, []int) {
return file_sentencepiece_model_proto_rawDescGZIP(), []int{3, 0}
}
func (x *ModelProto_SentencePiece) GetPiece() string {
if x != nil && x.Piece != nil {
return *x.Piece
}
return ""
}
func (x *ModelProto_SentencePiece) GetScore() float32 {
if x != nil && x.Score != nil {
return *x.Score
}
return 0
}
func (x *ModelProto_SentencePiece) GetType() ModelProto_SentencePiece_Type {
if x != nil && x.Type != nil {
return *x.Type
}
return Default_ModelProto_SentencePiece_Type
}
var File_sentencepiece_model_proto protoreflect.FileDescriptor
var file_sentencepiece_model_proto_rawDesc = []byte{
0x0a, 0x19, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x5f,
0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x73, 0x65, 0x6e,
0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x22, 0xc6, 0x12, 0x0a, 0x0b, 0x54,
0x72, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e,
0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x05, 0x69, 0x6e, 0x70, 0x75, 0x74,
0x12, 0x21, 0x0a, 0x0c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74,
0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x46, 0x6f, 0x72,
0x6d, 0x61, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, 0x70, 0x72, 0x65,
0x66, 0x69, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
0x50, 0x72, 0x65, 0x66, 0x69, 0x78, 0x12, 0x4c, 0x0a, 0x0a, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f,
0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x73, 0x65, 0x6e,
0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x54, 0x72, 0x61, 0x69, 0x6e,
0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x54, 0x79, 0x70, 0x65,
0x3a, 0x07, 0x55, 0x4e, 0x49, 0x47, 0x52, 0x41, 0x4d, 0x52, 0x09, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
0x54, 0x79, 0x70, 0x65, 0x12, 0x23, 0x0a, 0x0a, 0x76, 0x6f, 0x63, 0x61, 0x62, 0x5f, 0x73, 0x69,
0x7a, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x04, 0x38, 0x30, 0x30, 0x30, 0x52, 0x09,
0x76, 0x6f, 0x63, 0x61, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x61, 0x63, 0x63,
0x65, 0x70, 0x74, 0x5f, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x05, 0x20, 0x03,
0x28, 0x09, 0x52, 0x0e, 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x4c, 0x61, 0x6e, 0x67, 0x75, 0x61,
0x67, 0x65, 0x12, 0x34, 0x0a, 0x15, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f,
0x73, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28,
0x05, 0x3a, 0x01, 0x30, 0x52, 0x12, 0x73, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74, 0x53, 0x61,
0x6d, 0x70, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x45, 0x0a, 0x1b, 0x65, 0x6e, 0x61, 0x62,
0x6c, 0x65, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f,
0x70, 0x72, 0x69, 0x76, 0x61, 0x63, 0x79, 0x18, 0x32, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66,
0x61, 0x6c, 0x73, 0x65, 0x52, 0x19, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x44, 0x69, 0x66, 0x66,
0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x69, 0x76, 0x61, 0x63, 0x79, 0x12,
0x4a, 0x0a, 0x20, 0x64, 0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f,
0x70, 0x72, 0x69, 0x76, 0x61, 0x63, 0x79, 0x5f, 0x6e, 0x6f, 0x69, 0x73, 0x65, 0x5f, 0x6c, 0x65,
0x76, 0x65, 0x6c, 0x18, 0x33, 0x20, 0x01, 0x28, 0x02, 0x3a, 0x01, 0x30, 0x52, 0x1d, 0x64, 0x69,
0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x69, 0x76, 0x61, 0x63,
0x79, 0x4e, 0x6f, 0x69, 0x73, 0x65, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x58, 0x0a, 0x27, 0x64,
0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x70, 0x72, 0x69, 0x76,
0x61, 0x63, 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x5f, 0x74, 0x68, 0x72,
0x65, 0x73, 0x68, 0x6f, 0x6c, 0x64, 0x18, 0x34, 0x20, 0x01, 0x28, 0x04, 0x3a, 0x01, 0x30, 0x52,
0x24, 0x64, 0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x69,
0x76, 0x61, 0x63, 0x79, 0x43, 0x6c, 0x69, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x54, 0x68, 0x72, 0x65,
0x73, 0x68, 0x6f, 0x6c, 0x64, 0x12, 0x35, 0x0a, 0x12, 0x63, 0x68, 0x61, 0x72, 0x61, 0x63, 0x74,
0x65, 0x72, 0x5f, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x67, 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28,
0x02, 0x3a, 0x06, 0x30, 0x2e, 0x39, 0x39, 0x39, 0x35, 0x52, 0x11, 0x63, 0x68, 0x61, 0x72, 0x61,
0x63, 0x74, 0x65, 0x72, 0x43, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x67, 0x65, 0x12, 0x31, 0x0a, 0x13,
0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x73,
0x69, 0x7a, 0x65, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x04, 0x3a, 0x01, 0x30, 0x52, 0x11, 0x69, 0x6e,
0x70, 0x75, 0x74, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12,
0x3a, 0x0a, 0x16, 0x73, 0x68, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74,
0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x18, 0x13, 0x20, 0x01, 0x28, 0x08, 0x3a,
0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x14, 0x73, 0x68, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x49, 0x6e,
0x70, 0x75, 0x74, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x12, 0x34, 0x0a, 0x14, 0x6d,
0x69, 0x6e, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x73,
0x69, 0x7a, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x42, 0x02, 0x18, 0x01, 0x52, 0x12, 0x6d,
0x69, 0x6e, 0x69, 0x6e, 0x67, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x69, 0x7a,
0x65, 0x12, 0x38, 0x0a, 0x16, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x65,
0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x0d, 0x20, 0x01, 0x28,
0x05, 0x42, 0x02, 0x18, 0x01, 0x52, 0x14, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x53,
0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x3f, 0x0a, 0x17, 0x73,
0x65, 0x65, 0x64, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63,
0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x07, 0x31, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x52, 0x15, 0x73, 0x65, 0x65, 0x64, 0x53, 0x65, 0x6e, 0x74, 0x65,
0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x2f, 0x0a, 0x10,
0x73, 0x68, 0x72, 0x69, 0x6e, 0x6b, 0x69, 0x6e, 0x67, 0x5f, 0x66, 0x61, 0x63, 0x74, 0x6f, 0x72,
0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x3a, 0x04, 0x30, 0x2e, 0x37, 0x35, 0x52, 0x0f, 0x73, 0x68,
0x72, 0x69, 0x6e, 0x6b, 0x69, 0x6e, 0x67, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x34, 0x0a,
0x13, 0x6d, 0x61, 0x78, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x6c, 0x65,
0x6e, 0x67, 0x74, 0x68, 0x18, 0x12, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x04, 0x34, 0x31, 0x39, 0x32,
0x52, 0x11, 0x6d, 0x61, 0x78, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x4c, 0x65, 0x6e,
0x67, 0x74, 0x68, 0x12, 0x23, 0x0a, 0x0b, 0x6e, 0x75, 0x6d, 0x5f, 0x74, 0x68, 0x72, 0x65, 0x61,
0x64, 0x73, 0x18, 0x10, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x02, 0x31, 0x36, 0x52, 0x0a, 0x6e, 0x75,
0x6d, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2f, 0x0a, 0x12, 0x6e, 0x75, 0x6d, 0x5f,
0x73, 0x75, 0x62, 0x5f, 0x69, 0x74, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x11,
0x20, 0x01, 0x28, 0x05, 0x3a, 0x01, 0x32, 0x52, 0x10, 0x6e, 0x75, 0x6d, 0x53, 0x75, 0x62, 0x49,
0x74, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3c, 0x0a, 0x18, 0x6d, 0x61, 0x78,
0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x5f, 0x6c,
0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x02, 0x31, 0x36, 0x52,
0x16, 0x6d, 0x61, 0x78, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63,
0x65, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x3b, 0x0a, 0x17, 0x73, 0x70, 0x6c, 0x69, 0x74,
0x5f, 0x62, 0x79, 0x5f, 0x75, 0x6e, 0x69, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x73, 0x63, 0x72, 0x69,
0x70, 0x74, 0x18, 0x15, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x14,
0x73, 0x70, 0x6c, 0x69, 0x74, 0x42, 0x79, 0x55, 0x6e, 0x69, 0x63, 0x6f, 0x64, 0x65, 0x53, 0x63,
0x72, 0x69, 0x70, 0x74, 0x12, 0x2c, 0x0a, 0x0f, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x62, 0x79,
0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74,
0x72, 0x75, 0x65, 0x52, 0x0d, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x42, 0x79, 0x4e, 0x75, 0x6d, 0x62,
0x65, 0x72, 0x12, 0x34, 0x0a, 0x13, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x62, 0x79, 0x5f, 0x77,
0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x3a,
0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x11, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x42, 0x79, 0x57, 0x68,
0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x42, 0x0a, 0x1a, 0x74, 0x72, 0x65, 0x61,
0x74, 0x5f, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x61, 0x73, 0x5f,
0x73, 0x75, 0x66, 0x66, 0x69, 0x78, 0x18, 0x18, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61,
0x6c, 0x73, 0x65, 0x52, 0x17, 0x74, 0x72, 0x65, 0x61, 0x74, 0x57, 0x68, 0x69, 0x74, 0x65, 0x73,
0x70, 0x61, 0x63, 0x65, 0x41, 0x73, 0x53, 0x75, 0x66, 0x66, 0x69, 0x78, 0x12, 0x46, 0x0a, 0x1c,
0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x5f, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65,
0x5f, 0x6f, 0x6e, 0x6c, 0x79, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x18, 0x1a, 0x20, 0x01,
0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x19, 0x61, 0x6c, 0x6c, 0x6f, 0x77,
0x57, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x4f, 0x6e, 0x6c, 0x79, 0x50, 0x69,
0x65, 0x63, 0x65, 0x73, 0x12, 0x28, 0x0a, 0x0c, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x64, 0x69,
0x67, 0x69, 0x74, 0x73, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73,
0x65, 0x52, 0x0b, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x44, 0x69, 0x67, 0x69, 0x74, 0x73, 0x12, 0x3d,
0x0a, 0x19, 0x70, 0x72, 0x65, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f,
0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x65, 0x72, 0x18, 0x35, 0x20, 0x01, 0x28,
0x09, 0x3a, 0x00, 0x52, 0x18, 0x70, 0x72, 0x65, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61,
0x74, 0x69, 0x6f, 0x6e, 0x44, 0x65, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x65, 0x72, 0x12, 0x27, 0x0a,
0x0f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73,
0x18, 0x1e, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0e, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x53,
0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x12, 0x30, 0x0a, 0x14, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x64,
0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x18, 0x1f,
0x20, 0x03, 0x28, 0x09, 0x52, 0x12, 0x75, 0x73, 0x65, 0x72, 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65,
0x64, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x72, 0x65, 0x71, 0x75,
0x69, 0x72, 0x65, 0x64, 0x5f, 0x63, 0x68, 0x61, 0x72, 0x73, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09,
0x52, 0x0d, 0x72, 0x65, 0x71, 0x75, 0x69, 0x72, 0x65, 0x64, 0x43, 0x68, 0x61, 0x72, 0x73, 0x12,
0x2a, 0x0a, 0x0d, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x66, 0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b,
0x18, 0x23, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0c, 0x62,
0x79, 0x74, 0x65, 0x46, 0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x12, 0x47, 0x0a, 0x1d, 0x76,
0x6f, 0x63, 0x61, 0x62, 0x75, 0x6c, 0x61, 0x72, 0x79, 0x5f, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74,
0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x5f, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x20, 0x20, 0x01,
0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x1a, 0x76, 0x6f, 0x63, 0x61, 0x62, 0x75,
0x6c, 0x61, 0x72, 0x79, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x50, 0x69, 0x65, 0x63, 0x65, 0x53,
0x63, 0x6f, 0x72, 0x65, 0x12, 0x2e, 0x0a, 0x10, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x76, 0x6f, 0x63,
0x61, 0x62, 0x5f, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x21, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04,
0x74, 0x72, 0x75, 0x65, 0x52, 0x0e, 0x68, 0x61, 0x72, 0x64, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4c,
0x69, 0x6d, 0x69, 0x74, 0x12, 0x29, 0x0a, 0x0d, 0x75, 0x73, 0x65, 0x5f, 0x61, 0x6c, 0x6c, 0x5f,
0x76, 0x6f, 0x63, 0x61, 0x62, 0x18, 0x22, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c,
0x73, 0x65, 0x52, 0x0b, 0x75, 0x73, 0x65, 0x41, 0x6c, 0x6c, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x12,
0x18, 0x0a, 0x06, 0x75, 0x6e, 0x6b, 0x5f, 0x69, 0x64, 0x18, 0x28, 0x20, 0x01, 0x28, 0x05, 0x3a,
0x01, 0x30, 0x52, 0x05, 0x75, 0x6e, 0x6b, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x06, 0x62, 0x6f, 0x73,
0x5f, 0x69, 0x64, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x01, 0x31, 0x52, 0x05, 0x62, 0x6f,
0x73, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x06, 0x65, 0x6f, 0x73, 0x5f, 0x69, 0x64, 0x18, 0x2a, 0x20,
0x01, 0x28, 0x05, 0x3a, 0x01, 0x32, 0x52, 0x05, 0x65, 0x6f, 0x73, 0x49, 0x64, 0x12, 0x19, 0x0a,
0x06, 0x70, 0x61, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x02, 0x2d,
0x31, 0x52, 0x05, 0x70, 0x61, 0x64, 0x49, 0x64, 0x12, 0x22, 0x0a, 0x09, 0x75, 0x6e, 0x6b, 0x5f,
0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x09, 0x3a, 0x05, 0x3c, 0x75, 0x6e,
0x6b, 0x3e, 0x52, 0x08, 0x75, 0x6e, 0x6b, 0x50, 0x69, 0x65, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x09,
0x62, 0x6f, 0x73, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x09, 0x3a,
0x03, 0x3c, 0x73, 0x3e, 0x52, 0x08, 0x62, 0x6f, 0x73, 0x50, 0x69, 0x65, 0x63, 0x65, 0x12, 0x21,
0x0a, 0x09, 0x65, 0x6f, 0x73, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x2f, 0x20, 0x01, 0x28,
0x09, 0x3a, 0x04, 0x3c, 0x2f, 0x73, 0x3e, 0x52, 0x08, 0x65, 0x6f, 0x73, 0x50, 0x69, 0x65, 0x63,
0x65, 0x12, 0x22, 0x0a, 0x09, 0x70, 0x61, 0x64, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x30,
0x20, 0x01, 0x28, 0x09, 0x3a, 0x05, 0x3c, 0x70, 0x61, 0x64, 0x3e, 0x52, 0x08, 0x70, 0x61, 0x64,
0x50, 0x69, 0x65, 0x63, 0x65, 0x12, 0x26, 0x0a, 0x0b, 0x75, 0x6e, 0x6b, 0x5f, 0x73, 0x75, 0x72,
0x66, 0x61, 0x63, 0x65, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x09, 0x3a, 0x05, 0x20, 0xe2, 0x81, 0x87,
0x20, 0x52, 0x0a, 0x75, 0x6e, 0x6b, 0x53, 0x75, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x46, 0x0a,
0x1c, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x5f, 0x65, 0x78, 0x74, 0x72, 0x65, 0x6d, 0x65, 0x6c, 0x79,
0x5f, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x5f, 0x63, 0x6f, 0x72, 0x70, 0x75, 0x73, 0x18, 0x31, 0x20,
0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x19, 0x74, 0x72, 0x61, 0x69,
0x6e, 0x45, 0x78, 0x74, 0x72, 0x65, 0x6d, 0x65, 0x6c, 0x79, 0x4c, 0x61, 0x72, 0x67, 0x65, 0x43,
0x6f, 0x72, 0x70, 0x75, 0x73, 0x12, 0x3a, 0x0a, 0x18, 0x73, 0x65, 0x65, 0x64, 0x5f, 0x73, 0x65,
0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x5f, 0x66, 0x69, 0x6c,
0x65, 0x18, 0x36, 0x20, 0x01, 0x28, 0x09, 0x3a, 0x00, 0x52, 0x16, 0x73, 0x65, 0x65, 0x64, 0x53,
0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x46, 0x69, 0x6c,
0x65, 0x22, 0x35, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
0x0a, 0x07, 0x55, 0x4e, 0x49, 0x47, 0x52, 0x41, 0x4d, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x42,
0x50, 0x45, 0x10, 0x02, 0x12, 0x08, 0x0a, 0x04, 0x57, 0x4f, 0x52, 0x44, 0x10, 0x03, 0x12, 0x08,
0x0a, 0x04, 0x43, 0x48, 0x41, 0x52, 0x10, 0x04, 0x2a, 0x09, 0x08, 0xc8, 0x01, 0x10, 0x80, 0x80,
0x80, 0x80, 0x02, 0x22, 0xbd, 0x02, 0x0a, 0x0e, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a,
0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01,
0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x31, 0x0a, 0x14, 0x70, 0x72,
0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x68, 0x61, 0x72, 0x73, 0x6d,
0x61, 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x13, 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d,
0x70, 0x69, 0x6c, 0x65, 0x64, 0x43, 0x68, 0x61, 0x72, 0x73, 0x6d, 0x61, 0x70, 0x12, 0x2e, 0x0a,
0x10, 0x61, 0x64, 0x64, 0x5f, 0x64, 0x75, 0x6d, 0x6d, 0x79, 0x5f, 0x70, 0x72, 0x65, 0x66, 0x69,
0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x0e, 0x61,
0x64, 0x64, 0x44, 0x75, 0x6d, 0x6d, 0x79, 0x50, 0x72, 0x65, 0x66, 0x69, 0x78, 0x12, 0x3e, 0x0a,
0x18, 0x72, 0x65, 0x6d, 0x6f, 0x76, 0x65, 0x5f, 0x65, 0x78, 0x74, 0x72, 0x61, 0x5f, 0x77, 0x68,
0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x3a,
0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x16, 0x72, 0x65, 0x6d, 0x6f, 0x76, 0x65, 0x45, 0x78, 0x74,
0x72, 0x61, 0x57, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x12, 0x33, 0x0a,
0x12, 0x65, 0x73, 0x63, 0x61, 0x70, 0x65, 0x5f, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61,
0x63, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52,
0x11, 0x65, 0x73, 0x63, 0x61, 0x70, 0x65, 0x57, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63,
0x65, 0x73, 0x12, 0x34, 0x0a, 0x16, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x61, 0x74,
0x69, 0x6f, 0x6e, 0x5f, 0x72, 0x75, 0x6c, 0x65, 0x5f, 0x74, 0x73, 0x76, 0x18, 0x06, 0x20, 0x01,
0x28, 0x09, 0x52, 0x14, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f,
0x6e, 0x52, 0x75, 0x6c, 0x65, 0x54, 0x73, 0x76, 0x2a, 0x09, 0x08, 0xc8, 0x01, 0x10, 0x80, 0x80,
0x80, 0x80, 0x02, 0x22, 0x93, 0x01, 0x0a, 0x0c, 0x53, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74,
0x44, 0x61, 0x74, 0x61, 0x12, 0x3c, 0x0a, 0x07, 0x73, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x73, 0x18,
0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65,
0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x53, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74, 0x44, 0x61,
0x74, 0x61, 0x2e, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x52, 0x07, 0x73, 0x61, 0x6d, 0x70, 0x6c,
0x65, 0x73, 0x1a, 0x3a, 0x0a, 0x06, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x12, 0x14, 0x0a, 0x05,
0x69, 0x6e, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x69, 0x6e, 0x70,
0x75, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x65, 0x78, 0x70, 0x65, 0x63, 0x74, 0x65, 0x64, 0x18, 0x02,
0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x78, 0x70, 0x65, 0x63, 0x74, 0x65, 0x64, 0x2a, 0x09,
0x08, 0xc8, 0x01, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0xd7, 0x04, 0x0a, 0x0a, 0x4d, 0x6f,
0x64, 0x65, 0x6c, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x3f, 0x0a, 0x06, 0x70, 0x69, 0x65, 0x63,
0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65,
0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x50, 0x72,
0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x69, 0x65, 0x63,
0x65, 0x52, 0x06, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x12, 0x3d, 0x0a, 0x0c, 0x74, 0x72, 0x61,
0x69, 0x6e, 0x65, 0x72, 0x5f, 0x73, 0x70, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32,
0x1a, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e,
0x54, 0x72, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x52, 0x0b, 0x74, 0x72, 0x61,
0x69, 0x6e, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x12, 0x46, 0x0a, 0x0f, 0x6e, 0x6f, 0x72, 0x6d,
0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x70, 0x65, 0x63, 0x18, 0x03, 0x20, 0x01, 0x28,
0x0b, 0x32, 0x1d, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63,
0x65, 0x2e, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63,
0x52, 0x0e, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63,
0x12, 0x41, 0x0a, 0x0e, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x64, 0x61,
0x74, 0x61, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65,
0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x53, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73,
0x74, 0x44, 0x61, 0x74, 0x61, 0x52, 0x0c, 0x73, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74, 0x44,
0x61, 0x74, 0x61, 0x12, 0x4a, 0x0a, 0x11, 0x64, 0x65, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69,
0x7a, 0x65, 0x72, 0x5f, 0x73, 0x70, 0x65, 0x63, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d,
0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x4e,
0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x52, 0x10, 0x64,
0x65, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x1a,
0xe6, 0x01, 0x0a, 0x0d, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x69, 0x65, 0x63,
0x65, 0x12, 0x14, 0x0a, 0x05, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
0x52, 0x05, 0x70, 0x69, 0x65, 0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65,
0x18, 0x02, 0x20, 0x01, 0x28, 0x02, 0x52, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x12, 0x48, 0x0a,
0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2c, 0x2e, 0x73, 0x65,
0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x4d, 0x6f, 0x64, 0x65,
0x6c, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x50,
0x69, 0x65, 0x63, 0x65, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x3a, 0x06, 0x4e, 0x4f, 0x52, 0x4d, 0x41,
0x4c, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0x54, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12,
0x0a, 0x0a, 0x06, 0x4e, 0x4f, 0x52, 0x4d, 0x41, 0x4c, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x55,
0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x4f, 0x4e, 0x54,
0x52, 0x4f, 0x4c, 0x10, 0x03, 0x12, 0x10, 0x0a, 0x0c, 0x55, 0x53, 0x45, 0x52, 0x5f, 0x44, 0x45,
0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x04, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x59, 0x54, 0x45, 0x10,
0x06, 0x12, 0x0a, 0x0a, 0x06, 0x55, 0x4e, 0x55, 0x53, 0x45, 0x44, 0x10, 0x05, 0x2a, 0x09, 0x08,
0xc8, 0x01, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x2a, 0x09, 0x08, 0xc8, 0x01, 0x10, 0x80, 0x80,
0x80, 0x80, 0x02, 0x42, 0x13, 0x48, 0x03, 0x5a, 0x0f, 0x2e, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x65,
0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65,
}
var (
file_sentencepiece_model_proto_rawDescOnce sync.Once
file_sentencepiece_model_proto_rawDescData = file_sentencepiece_model_proto_rawDesc
)
func file_sentencepiece_model_proto_rawDescGZIP() []byte {
file_sentencepiece_model_proto_rawDescOnce.Do(func() {
file_sentencepiece_model_proto_rawDescData = protoimpl.X.CompressGZIP(file_sentencepiece_model_proto_rawDescData)
})
return file_sentencepiece_model_proto_rawDescData
}
var file_sentencepiece_model_proto_enumTypes = make([]protoimpl.EnumInfo, 2)
var file_sentencepiece_model_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
var file_sentencepiece_model_proto_goTypes = []interface{}{
(TrainerSpec_ModelType)(0), // 0: sentencepiece.TrainerSpec.ModelType
(ModelProto_SentencePiece_Type)(0), // 1: sentencepiece.ModelProto.SentencePiece.Type
(*TrainerSpec)(nil), // 2: sentencepiece.TrainerSpec
(*NormalizerSpec)(nil), // 3: sentencepiece.NormalizerSpec
(*SelfTestData)(nil), // 4: sentencepiece.SelfTestData
(*ModelProto)(nil), // 5: sentencepiece.ModelProto
(*SelfTestData_Sample)(nil), // 6: sentencepiece.SelfTestData.Sample
(*ModelProto_SentencePiece)(nil), // 7: sentencepiece.ModelProto.SentencePiece
}
var file_sentencepiece_model_proto_depIdxs = []int32{
0, // 0: sentencepiece.TrainerSpec.model_type:type_name -> sentencepiece.TrainerSpec.ModelType
6, // 1: sentencepiece.SelfTestData.samples:type_name -> sentencepiece.SelfTestData.Sample
7, // 2: sentencepiece.ModelProto.pieces:type_name -> sentencepiece.ModelProto.SentencePiece
2, // 3: sentencepiece.ModelProto.trainer_spec:type_name -> sentencepiece.TrainerSpec
3, // 4: sentencepiece.ModelProto.normalizer_spec:type_name -> sentencepiece.NormalizerSpec
4, // 5: sentencepiece.ModelProto.self_test_data:type_name -> sentencepiece.SelfTestData
3, // 6: sentencepiece.ModelProto.denormalizer_spec:type_name -> sentencepiece.NormalizerSpec
1, // 7: sentencepiece.ModelProto.SentencePiece.type:type_name -> sentencepiece.ModelProto.SentencePiece.Type
8, // [8:8] is the sub-list for method output_type
8, // [8:8] is the sub-list for method input_type
8, // [8:8] is the sub-list for extension type_name
8, // [8:8] is the sub-list for extension extendee
0, // [0:8] is the sub-list for field type_name
}
func init() { file_sentencepiece_model_proto_init() }
func file_sentencepiece_model_proto_init() {
if File_sentencepiece_model_proto != nil {
return
}
if !protoimpl.UnsafeEnabled {
file_sentencepiece_model_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*TrainerSpec); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
case 3:
return &v.extensionFields
default:
return nil
}
}
file_sentencepiece_model_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*NormalizerSpec); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
case 3:
return &v.extensionFields
default:
return nil
}
}
file_sentencepiece_model_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*SelfTestData); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
case 3:
return &v.extensionFields
default:
return nil
}
}
file_sentencepiece_model_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ModelProto); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
case 3:
return &v.extensionFields
default:
return nil
}
}
file_sentencepiece_model_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*SelfTestData_Sample); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_sentencepiece_model_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ModelProto_SentencePiece); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
case 3:
return &v.extensionFields
default:
return nil
}
}
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_sentencepiece_model_proto_rawDesc,
NumEnums: 2,
NumMessages: 6,
NumExtensions: 0,
NumServices: 0,
},
GoTypes: file_sentencepiece_model_proto_goTypes,
DependencyIndexes: file_sentencepiece_model_proto_depIdxs,
EnumInfos: file_sentencepiece_model_proto_enumTypes,
MessageInfos: file_sentencepiece_model_proto_msgTypes,
}.Build()
File_sentencepiece_model_proto = out.File
file_sentencepiece_model_proto_rawDesc = nil
file_sentencepiece_model_proto_goTypes = nil
file_sentencepiece_model_proto_depIdxs = nil
}
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.!
syntax = "proto2";
// TODO(taku): Needs to use LITE RUNTIME in OSS release.
option optimize_for = LITE_RUNTIME;
option go_package = "./sentencepiece";
package sentencepiece;
// TrainerSpec encodes a various parameters for SentencePiece training.
// Next id: 55
message TrainerSpec {
///////////////////////////////////////////////////////////////////
// General parameters
//
// Input corpus files.
// Trainer accepts the following two formats:
// A) Monolingual: plain text, one sentence per line.
// B) Bilingual: TSV, source sentence <tab> target sentence
// When bilingual data is passed, shared vocabulary model is built.
// Note that the input file must be raw corpus, not a preprocessed corpus.
// Trainer only loads the first `input_sentence_size` sentences specified
// with this parameter.
repeated string input = 1;
// Input corpus format:
// "text": one-sentence-per-line text format (default)
// "tsv": sentence <tab> freq
optional string input_format = 7;
// Output model file prefix.
// <model_prefix>.model and <model_prefix>.vocab are generated.
optional string model_prefix = 2;
// Model type. only have UNIGRAM now.
enum ModelType {
UNIGRAM = 1; // Unigram language model with dynamic algorithm
BPE = 2; // Byte Pair Encoding
WORD = 3; // Delimitered by whitespace.
CHAR = 4; // tokenizes into character sequence
}
optional ModelType model_type = 3 [default = UNIGRAM];
// Vocabulary size. 8k is the default size.
optional int32 vocab_size = 4 [default = 8000];
// List of the languages this model can accept.
// Since the model is language-agnostic, this field is used as a reference.
repeated string accept_language = 5;
// Size of self-test samples, which are encoded in the model file.
optional int32 self_test_sample_size = 6 [default = 0];
// Whether to use DP version of sentencepiece. Use it with TSV input format
// (requires precomputed word tab counts to work).
optional bool enable_differential_privacy = 50 [default = false];
// Set these parameters if you need DP version of sentencepiece.
// std of noise to add.
optional float differential_privacy_noise_level = 51 [default = 0.0];
// Clipping threshold to apply after adding noise. All the words with
// frequency less than this value are dropped.
optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
///////////////////////////////////////////////////////////////////
// Training parameters.
//
// Uses characters which cover the corpus with the ratio of `chars_coverage`.
// This parameter determines the set of basic Alphabet of sentence piece.
// 1.0 - `chars_coverage` characters are treated as UNK.
// See also required_chars field.
optional float character_coverage = 10 [default = 0.9995];
// Maximum size of sentences the trainer loads from `input` parameter.
// Trainer simply loads the `input` files in sequence.
// It is better to shuffle the input corpus randomly.
optional uint64 input_sentence_size = 11 [default = 0];
optional bool shuffle_input_sentence = 19 [default = true];
// Maximum size of sentences to make seed sentence pieces.
// Extended suffix array is constructed to extract frequent
// sub-strings from the corpus. This uses 20N working space,
// where N is the size of corpus.
optional int32 mining_sentence_size = 12 [deprecated = true];
// Maximum size of sentences to train sentence pieces.
optional int32 training_sentence_size = 13 [deprecated = true];
// The size of seed sentencepieces.
// `seed_sentencepiece_size` must be larger than `vocab_size`.
optional int32 seed_sentencepiece_size = 14 [default = 1000000];
// In every EM sub-iterations, keeps top
// `shrinking_factor` * `current sentencepieces size` with respect to
// the loss of the sentence piece. This value should be smaller than 1.0.
optional float shrinking_factor = 15 [default = 0.75];
// The maximum sentence length in byte. The sentences with the length
// larger than `max_sentence_length` is simply ignored.
// Longer input tends to bring the following risks:
// * Overflow during EM training (unigram language model only)
// * Performance drop because of O(n log n) cost in BPE.
optional int32 max_sentence_length = 18 [default = 4192];
// Number of threads in the training.
optional int32 num_threads = 16 [default = 16];
// Number of EM sub iterations.
optional int32 num_sub_iterations = 17 [default = 2];
///////////////////////////////////////////////////////////////////
// SentencePiece parameters which control the shapes of sentence piece.
//
// Maximum length of sentencepiece.
optional int32 max_sentencepiece_length = 20 [default = 16];
// Uses Unicode script to split sentence pieces.
// When `split_by_unicode_script` is true, we do not allow sentence piece to
// include multiple Unicode scripts, e.g. "F1" is not a valid piece.
// Exception: CJ characters (Hiragana/Katakana/Han) are all handled
// as one script type, since Japanese word can consist of multiple scripts.
// This exception is always applied regardless of the accept-language
// parameter.
optional bool split_by_unicode_script = 21 [default = true];
// When `split_by_number` is true, put a boundary between number and
// non-number transition. If we want to treat "F1" is one token, set this flag
// to be false.
optional bool split_by_number = 23 [default = true];
// Use a white space to split sentence pieces.
// When `split_by_whitespace` is false, we may have the piece containing
// a white space in the middle. e.g., "in_the".
optional bool split_by_whitespace = 22 [default = true];
// Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
// hello_. When `treat_whitespace_as_suffix` is true,
// NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
// of sentence.
optional bool treat_whitespace_as_suffix = 24 [default = false];
// Allows pieces that only contain whitespaces instead of appearing only as
// prefix or suffix of other pieces.
optional bool allow_whitespace_only_pieces = 26 [default = false];
// Split all digits (0-9) into separate pieces.
optional bool split_digits = 25 [default = false];
// Defines the pre-tokenization delimiter.
// When specified, no pieces crossing this delimiter is not included
// in the vocab. Then the delimiter string is virtually ignored
// during the training. This field can allows constraints on the vocabulary
// selection. Note that this field is available on unigram mode.
optional string pretokenization_delimiter = 53 [ default = ""];
///////////////////////////////////////////////////////////////////
// Vocabulary management
//
// Defines control symbols used as an indicator to
// change the behavior of the decoder. <s> and </s> are pre-defined.
// We can use this field to encode various meta information,
// including language indicator in multilingual model.
// These symbols are not visible to users, but visible to
// the decoder. Note that when the input sentence contains control symbols,
// they are not treated as one token, but segmented into normal pieces.
// Control symbols must be inserted independently from the segmentation.
repeated string control_symbols = 30;
// Defines user defined symbols.
// These symbols are added with extremely high score
// so they are always treated as one unique symbol in any context.
// Typical usage of user_defined_symbols is placeholder for named entities.
repeated string user_defined_symbols = 31;
// Defines required characters. Each UTF8 character in this string is included
// in the character set regardless of character_coverage value. Unlike
// user_defined_symbols, these characters have scores based on the frequency
// on input sentences, and the model can form subwords using characters
// in this field.
optional string required_chars = 36;
// Decomposes unknown pieces into UTF-8 bytes.
optional bool byte_fallback = 35 [default = false];
// When creating the vocabulary file, defines whether or not to additionally
// output the score for each piece.
optional bool vocabulary_output_piece_score = 32 [default = true];
// `vocab_size` is treated as hard limit. Crash if
// the model can not produce the vocab of size `vocab_size`,
// When `hard_vocab_limit` is false, vocab_size is treated
// as soft limit. Note that when model_type=char,
// always assumes hard_vocab_limit = false.
optional bool hard_vocab_limit = 33 [default = true];
// use all symbols for vocab extraction. This flag is valid
// if model type is either CHAR or WORD
optional bool use_all_vocab = 34 [default = false];
///////////////////////////////////////////////////////////////////
// Reserved special meta tokens.
// * -1 is not used.
// * unk_id must not be -1.
// Id must starts with 0 and be contigous.
optional int32 unk_id = 40 [default = 0]; // <unk>
optional int32 bos_id = 41 [default = 1]; // <s>
optional int32 eos_id = 42 [default = 2]; // </s>
optional int32 pad_id = 43 [default = -1]; // <pad> (padding)
optional string unk_piece = 45 [default = "<unk>"];
optional string bos_piece = 46 [default = "<s>"];
optional string eos_piece = 47 [default = "</s>"];
optional string pad_piece = 48 [default = "<pad>"];
// Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
// since this character can be useful both for user and
// developer. We can easily figure out that <unk> is emitted.
optional string unk_surface = 44 [default = " \xE2\x81\x87 "];
// Increase bit depth to allow unigram model training on large
// (>10M sentences) corpora. A Side-effect of enabling this flag
// is increased memory usage.
optional bool train_extremely_large_corpus = 49 [default = false];
// Path to a seed sentencepieces file, with one tab-separated
// seed sentencepiece <tab> frequency per line.
optional string seed_sentencepieces_file = 54 [default = ""];
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions 200 to max;
}
// NormalizerSpec encodes a various parameters for string normalizaiton
message NormalizerSpec {
// name of normalization rule.
optional string name = 1;
// Pre-compiled normalization rule created by
// Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
// Usually this field is set by Builder::GetNormalizerSpec() method.
optional bytes precompiled_charsmap = 2;
// Adds dummy whitespace at the beginning of text in order to
// treat "world" in "world" and "hello world" in the same way.
optional bool add_dummy_prefix = 3 [default = true];
// Removes leading, trailing, and duplicate internal whitespace.
optional bool remove_extra_whitespaces = 4 [default = true];
// Replaces whitespace with meta symbol.
// This field must be true to train sentence piece model.
optional bool escape_whitespaces = 5 [default = true];
// Custom normalization rule file in TSV format.
// https://github.com/google/sentencepiece/blob/master/doc/normalization.md
// This field is only used in SentencePieceTrainer::Train() method, which
// compiles the rule into the binary rule stored in `precompiled_charsmap`.
optional string normalization_rule_tsv = 6;
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions 200 to max;
}
// Proto to store samples for self-testing.
message SelfTestData {
message Sample {
optional string input = 1;
optional string expected = 2;
}
repeated Sample samples = 1;
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions 200 to max;
}
// ModelProto stores model parameters.
// SentencePieceProcessor is supposed to be self-contained.
// All settings/parameters which may change the behavior must be encoded
// in ModelProto.
message ModelProto {
message SentencePiece {
enum Type {
NORMAL = 1; // normal symbol
UNKNOWN = 2; // unknown symbol. only <unk> for now.
CONTROL = 3; // control symbols. </s>, <s>, <2ja> etc.
USER_DEFINED = 4; // user defined symbols.
// Typical usage of USER_DEFINED symbol
// is placeholder.
BYTE = 6; // byte symbols. Used when `byte_fallback` is true.
UNUSED = 5; // this piece is not used.
}
optional string piece = 1; // piece must not be empty.
optional float score = 2;
optional Type type = 3 [default = NORMAL];
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions 200 to max;
}
// Sentence pieces with scores.
repeated SentencePiece pieces = 1;
// Spec used to generate this model file.
optional TrainerSpec trainer_spec = 2;
// Spec for text normalization.
optional NormalizerSpec normalizer_spec = 3;
// Stores sample input and its expected segmentation to verify the model.
optional SelfTestData self_test_data = 4;
// Spec for text de-normalization.
optional NormalizerSpec denormalizer_spec = 5;
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions 200 to max;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment