Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
2c017ca4
Unverified
Commit
2c017ca4
authored
Mar 06, 2024
by
Patrick Devine
Committed by
GitHub
Mar 06, 2024
Browse files
Convert Safetensors to an Ollama model (#2824)
parent
0ded7fdc
Changes
9
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
3083 additions
and
153 deletions
+3083
-153
cmd/cmd.go
cmd/cmd.go
+89
-8
convert/convert.go
convert/convert.go
+331
-0
convert/sentencepiece/sentencepiece_model.pb.go
convert/sentencepiece/sentencepiece_model.pb.go
+1497
-0
convert/sentencepiece_model.proto
convert/sentencepiece_model.proto
+333
-0
go.mod
go.mod
+22
-3
go.sum
go.sum
+148
-2
llm/ggml.go
llm/ggml.go
+2
-2
llm/gguf.go
llm/gguf.go
+574
-137
server/images.go
server/images.go
+87
-1
No files found.
cmd/cmd.go
View file @
2c017ca4
package
cmd
package
cmd
import
(
import
(
"archive/zip"
"bytes"
"bytes"
"context"
"context"
"crypto/ed25519"
"crypto/ed25519"
...
@@ -87,22 +88,82 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
...
@@ -87,22 +88,82 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
path
=
filepath
.
Join
(
filepath
.
Dir
(
filename
),
path
)
path
=
filepath
.
Join
(
filepath
.
Dir
(
filename
),
path
)
}
}
bin
,
err
:=
os
.
Open
(
path
)
fi
,
err
:=
os
.
Stat
(
path
)
if
errors
.
Is
(
err
,
os
.
ErrNotExist
)
&&
c
.
Name
==
"model"
{
if
errors
.
Is
(
err
,
os
.
ErrNotExist
)
&&
c
.
Name
==
"model"
{
continue
continue
}
else
if
err
!=
nil
{
}
else
if
err
!=
nil
{
return
err
return
err
}
}
defer
bin
.
Close
()
hash
:=
sha256
.
New
()
// TODO make this work w/ adapters
if
_
,
err
:=
io
.
Copy
(
hash
,
bin
);
err
!=
nil
{
if
fi
.
IsDir
()
{
tf
,
err
:=
os
.
CreateTemp
(
""
,
"ollama-tf"
)
if
err
!=
nil
{
return
err
return
err
}
}
bin
.
Seek
(
0
,
io
.
SeekStart
)
defer
os
.
RemoveAll
(
tf
.
Name
()
)
digest
:=
fmt
.
Sprintf
(
"sha256:%x"
,
hash
.
Sum
(
nil
))
zf
:=
zip
.
NewWriter
(
tf
)
if
err
=
client
.
CreateBlob
(
cmd
.
Context
(),
digest
,
bin
);
err
!=
nil
{
files
,
err
:=
filepath
.
Glob
(
filepath
.
Join
(
path
,
"model-*.safetensors"
))
if
err
!=
nil
{
return
err
}
if
len
(
files
)
==
0
{
return
fmt
.
Errorf
(
"no safetensors files were found in '%s'"
,
path
)
}
// add the safetensor config file + tokenizer
files
=
append
(
files
,
filepath
.
Join
(
path
,
"config.json"
))
files
=
append
(
files
,
filepath
.
Join
(
path
,
"added_tokens.json"
))
files
=
append
(
files
,
filepath
.
Join
(
path
,
"tokenizer.model"
))
for
_
,
fn
:=
range
files
{
f
,
err
:=
os
.
Open
(
fn
)
if
os
.
IsNotExist
(
err
)
&&
strings
.
HasSuffix
(
fn
,
"added_tokens.json"
)
{
continue
}
else
if
err
!=
nil
{
return
err
}
fi
,
err
:=
f
.
Stat
()
if
err
!=
nil
{
return
err
}
h
,
err
:=
zip
.
FileInfoHeader
(
fi
)
if
err
!=
nil
{
return
err
}
h
.
Name
=
filepath
.
Base
(
fn
)
h
.
Method
=
zip
.
Store
w
,
err
:=
zf
.
CreateHeader
(
h
)
if
err
!=
nil
{
return
err
}
_
,
err
=
io
.
Copy
(
w
,
f
)
if
err
!=
nil
{
return
err
}
}
if
err
:=
zf
.
Close
();
err
!=
nil
{
return
err
}
if
err
:=
tf
.
Close
();
err
!=
nil
{
return
err
}
path
=
tf
.
Name
()
}
digest
,
err
:=
createBlob
(
cmd
,
client
,
path
)
if
err
!=
nil
{
return
err
return
err
}
}
...
@@ -141,6 +202,26 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
...
@@ -141,6 +202,26 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return
nil
return
nil
}
}
func
createBlob
(
cmd
*
cobra
.
Command
,
client
*
api
.
Client
,
path
string
)
(
string
,
error
)
{
bin
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
return
""
,
err
}
defer
bin
.
Close
()
hash
:=
sha256
.
New
()
if
_
,
err
:=
io
.
Copy
(
hash
,
bin
);
err
!=
nil
{
return
""
,
err
}
bin
.
Seek
(
0
,
io
.
SeekStart
)
digest
:=
fmt
.
Sprintf
(
"sha256:%x"
,
hash
.
Sum
(
nil
))
if
err
=
client
.
CreateBlob
(
cmd
.
Context
(),
digest
,
bin
);
err
!=
nil
{
return
""
,
err
}
return
digest
,
nil
}
func
RunHandler
(
cmd
*
cobra
.
Command
,
args
[]
string
)
error
{
func
RunHandler
(
cmd
*
cobra
.
Command
,
args
[]
string
)
error
{
client
,
err
:=
api
.
ClientFromEnvironment
()
client
,
err
:=
api
.
ClientFromEnvironment
()
if
err
!=
nil
{
if
err
!=
nil
{
...
...
convert/convert.go
0 → 100644
View file @
2c017ca4
package
convert
import
(
"bytes"
"cmp"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"slices"
"github.com/mitchellh/mapstructure"
"google.golang.org/protobuf/proto"
"github.com/jmorganca/ollama/convert/sentencepiece"
"github.com/jmorganca/ollama/llm"
)
type
Params
struct
{
Architectures
[]
string
`json:"architectures"`
VocabSize
int
`json:"vocab_size"`
HiddenSize
int
`json:"hidden_size"`
// n_embd
HiddenLayers
int
`json:"num_hidden_layers"`
// n_layer
ContextSize
int
`json:"max_position_embeddings"`
IntermediateSize
int
`json:"intermediate_size"`
AttentionHeads
int
`json:"num_attention_heads"`
// n_head
KeyValHeads
int
`json:"num_key_value_heads"`
NormEPS
float64
`json:"rms_norm_eps"`
RopeFreqBase
float64
`json:"rope_theta"`
BoSTokenID
int
`json:"bos_token_id"`
EoSTokenID
int
`json:"eos_token_id"`
}
type
MetaData
struct
{
Type
string
`mapstructure:"dtype"`
Shape
[]
int
`mapstructure:"shape"`
Offsets
[]
int
`mapstructure:"data_offsets"`
}
func
ReadSafeTensors
(
fn
string
,
offset
uint64
)
([]
llm
.
Tensor
,
uint64
,
error
)
{
f
,
err
:=
os
.
Open
(
fn
)
if
err
!=
nil
{
return
[]
llm
.
Tensor
{},
0
,
err
}
defer
f
.
Close
()
var
jsonSize
uint64
binary
.
Read
(
f
,
binary
.
LittleEndian
,
&
jsonSize
)
buf
:=
make
([]
byte
,
jsonSize
)
_
,
err
=
io
.
ReadFull
(
f
,
buf
)
if
err
!=
nil
{
return
[]
llm
.
Tensor
{},
0
,
err
}
d
:=
json
.
NewDecoder
(
bytes
.
NewBuffer
(
buf
))
d
.
UseNumber
()
var
parsed
map
[
string
]
interface
{}
if
err
=
d
.
Decode
(
&
parsed
);
err
!=
nil
{
return
[]
llm
.
Tensor
{},
0
,
err
}
var
keys
[]
string
for
k
:=
range
parsed
{
keys
=
append
(
keys
,
k
)
}
slices
.
Sort
(
keys
)
slog
.
Info
(
"converting layers"
)
var
tensors
[]
llm
.
Tensor
for
_
,
k
:=
range
keys
{
vals
:=
parsed
[
k
]
.
(
map
[
string
]
interface
{})
var
data
MetaData
if
err
=
mapstructure
.
Decode
(
vals
,
&
data
);
err
!=
nil
{
return
[]
llm
.
Tensor
{},
0
,
err
}
var
size
uint64
var
kind
uint32
switch
len
(
data
.
Shape
)
{
case
0
:
// metadata
continue
case
1
:
// convert to float32
kind
=
0
size
=
uint64
(
data
.
Shape
[
0
]
*
4
)
case
2
:
// convert to float16
kind
=
1
size
=
uint64
(
data
.
Shape
[
0
]
*
data
.
Shape
[
1
]
*
2
)
}
ggufName
,
err
:=
GetTensorName
(
k
)
if
err
!=
nil
{
slog
.
Error
(
"%v"
,
err
)
return
[]
llm
.
Tensor
{},
0
,
err
}
shape
:=
[
4
]
uint64
{
0
,
0
,
0
,
0
}
for
cnt
,
s
:=
range
data
.
Shape
{
shape
[
cnt
]
=
uint64
(
s
)
}
t
:=
llm
.
Tensor
{
Name
:
ggufName
,
Kind
:
kind
,
Offset
:
offset
,
Shape
:
shape
,
FileName
:
fn
,
OffsetPadding
:
8
+
jsonSize
,
FileOffsets
:
[]
uint64
{
uint64
(
data
.
Offsets
[
0
]),
uint64
(
data
.
Offsets
[
1
])},
}
slog
.
Debug
(
fmt
.
Sprintf
(
"%v"
,
t
))
tensors
=
append
(
tensors
,
t
)
offset
+=
size
}
return
tensors
,
offset
,
nil
}
func
GetSafeTensors
(
dirpath
string
)
([]
llm
.
Tensor
,
error
)
{
var
tensors
[]
llm
.
Tensor
files
,
err
:=
filepath
.
Glob
(
filepath
.
Join
(
dirpath
,
"/model-*.safetensors"
))
if
err
!=
nil
{
return
[]
llm
.
Tensor
{},
err
}
var
offset
uint64
for
_
,
f
:=
range
files
{
var
t
[]
llm
.
Tensor
var
err
error
t
,
offset
,
err
=
ReadSafeTensors
(
f
,
offset
)
if
err
!=
nil
{
slog
.
Error
(
"%v"
,
err
)
return
[]
llm
.
Tensor
{},
err
}
tensors
=
append
(
tensors
,
t
...
)
}
return
tensors
,
nil
}
func
GetParams
(
dirpath
string
)
(
*
Params
,
error
)
{
f
,
err
:=
os
.
Open
(
filepath
.
Join
(
dirpath
,
"config.json"
))
if
err
!=
nil
{
return
nil
,
err
}
defer
f
.
Close
()
var
params
Params
d
:=
json
.
NewDecoder
(
f
)
err
=
d
.
Decode
(
&
params
)
if
err
!=
nil
{
return
nil
,
err
}
return
&
params
,
nil
}
// Details on gguf's tokenizer can be found at:
// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
type
Vocab
struct
{
Tokens
[]
string
Scores
[]
float32
Types
[]
int32
}
func
LoadTokens
(
dirpath
string
)
(
*
Vocab
,
error
)
{
slog
.
Info
(
fmt
.
Sprintf
(
"reading vocab from %s"
,
filepath
.
Join
(
dirpath
,
"tokenizer.model"
)))
in
,
err
:=
os
.
ReadFile
(
filepath
.
Join
(
dirpath
,
"tokenizer.model"
))
if
err
!=
nil
{
return
nil
,
err
}
// To regenerate sentencepiece from the protobufs use:
// protoc -I=./ --go_out=./ sentencepiece_model.proto
modelProto
:=
&
sentencepiece
.
ModelProto
{}
if
err
:=
proto
.
Unmarshal
(
in
,
modelProto
);
err
!=
nil
{
return
nil
,
err
}
v
:=
&
Vocab
{
Tokens
:
make
([]
string
,
0
),
Scores
:
make
([]
float32
,
0
),
Types
:
make
([]
int32
,
0
),
}
pieces
:=
modelProto
.
GetPieces
()
for
_
,
p
:=
range
pieces
{
v
.
Tokens
=
append
(
v
.
Tokens
,
p
.
GetPiece
())
v
.
Scores
=
append
(
v
.
Scores
,
p
.
GetScore
())
t
:=
p
.
GetType
()
v
.
Types
=
append
(
v
.
Types
,
int32
(
t
))
}
slog
.
Info
(
fmt
.
Sprintf
(
"vocab size: %d"
,
len
(
v
.
Tokens
)))
// add any additional tokens
addIn
,
err
:=
os
.
ReadFile
(
filepath
.
Join
(
dirpath
,
"added_tokens.json"
))
if
os
.
IsNotExist
(
err
)
{
return
v
,
nil
}
else
if
err
!=
nil
{
return
nil
,
err
}
slog
.
Info
(
"reading user defined tokens"
)
var
extraTokenData
map
[
string
]
int
if
err
:=
json
.
Unmarshal
(
addIn
,
&
extraTokenData
);
err
!=
nil
{
return
nil
,
err
}
type
token
struct
{
key
string
pos
int
}
extraTokens
:=
make
([]
token
,
0
)
for
k
,
id
:=
range
extraTokenData
{
extraTokens
=
append
(
extraTokens
,
token
{
k
,
id
})
}
slices
.
SortFunc
(
extraTokens
,
func
(
a
,
b
token
)
int
{
return
cmp
.
Compare
(
a
.
pos
,
b
.
pos
)
})
numToks
:=
len
(
v
.
Tokens
)
for
cnt
,
t
:=
range
extraTokens
{
// the token id should match the specific index for the total number of tokens
if
t
.
pos
!=
cnt
+
numToks
{
return
nil
,
fmt
.
Errorf
(
"token ID '%d' for '%s' doesn't match total token size"
,
t
.
pos
,
t
.
key
)
}
v
.
Tokens
=
append
(
v
.
Tokens
,
t
.
key
)
v
.
Scores
=
append
(
v
.
Scores
,
-
1000.0
)
v
.
Types
=
append
(
v
.
Types
,
int32
(
llm
.
GGUFTokenUserDefined
))
}
slog
.
Info
(
fmt
.
Sprintf
(
"vocab size w/ extra tokens: %d"
,
len
(
v
.
Tokens
)))
return
v
,
nil
}
func
GetTensorName
(
n
string
)
(
string
,
error
)
{
tMap
:=
map
[
string
]
string
{
"model.embed_tokens.weight"
:
"token_embd.weight"
,
"model.layers.(
\\
d+).input_layernorm.weight"
:
"blk.$1.attn_norm.weight"
,
"model.layers.(
\\
d+).mlp.down_proj.weight"
:
"blk.$1.ffn_down.weight"
,
"model.layers.(
\\
d+).mlp.gate_proj.weight"
:
"blk.$1.ffn_gate.weight"
,
"model.layers.(
\\
d+).mlp.up_proj.weight"
:
"blk.$1.ffn_up.weight"
,
"model.layers.(
\\
d+).post_attention_layernorm.weight"
:
"blk.$1.ffn_norm.weight"
,
"model.layers.(
\\
d+).self_attn.k_proj.weight"
:
"blk.$1.attn_k.weight"
,
"model.layers.(
\\
d+).self_attn.o_proj.weight"
:
"blk.$1.attn_output.weight"
,
"model.layers.(
\\
d+).self_attn.q_proj.weight"
:
"blk.$1.attn_q.weight"
,
"model.layers.(
\\
d+).self_attn.v_proj.weight"
:
"blk.$1.attn_v.weight"
,
"lm_head.weight"
:
"output.weight"
,
"model.norm.weight"
:
"output_norm.weight"
,
}
v
,
ok
:=
tMap
[
n
]
if
ok
{
return
v
,
nil
}
// quick hack to rename the layers to gguf format
for
k
,
v
:=
range
tMap
{
re
:=
regexp
.
MustCompile
(
k
)
newName
:=
re
.
ReplaceAllString
(
n
,
v
)
if
newName
!=
n
{
return
newName
,
nil
}
}
return
""
,
fmt
.
Errorf
(
"couldn't find a layer name for '%s'"
,
n
)
}
func
WriteGGUF
(
name
string
,
tensors
[]
llm
.
Tensor
,
params
*
Params
,
vocab
*
Vocab
)
(
string
,
error
)
{
c
:=
llm
.
ContainerGGUF
{
ByteOrder
:
binary
.
LittleEndian
,
}
m
:=
llm
.
NewGGUFModel
(
&
c
)
m
.
Tensors
=
tensors
m
.
KV
[
"general.architecture"
]
=
"llama"
m
.
KV
[
"general.name"
]
=
name
m
.
KV
[
"llama.context_length"
]
=
uint32
(
params
.
ContextSize
)
m
.
KV
[
"llama.embedding_length"
]
=
uint32
(
params
.
HiddenSize
)
m
.
KV
[
"llama.block_count"
]
=
uint32
(
params
.
HiddenLayers
)
m
.
KV
[
"llama.feed_forward_length"
]
=
uint32
(
params
.
IntermediateSize
)
m
.
KV
[
"llama.rope.dimension_count"
]
=
uint32
(
128
)
m
.
KV
[
"llama.attention.head_count"
]
=
uint32
(
params
.
AttentionHeads
)
m
.
KV
[
"llama.attention.head_count_kv"
]
=
uint32
(
params
.
KeyValHeads
)
m
.
KV
[
"llama.attention.layer_norm_rms_epsilon"
]
=
float32
(
params
.
NormEPS
)
m
.
KV
[
"llama.rope.freq_base"
]
=
float32
(
params
.
RopeFreqBase
)
m
.
KV
[
"general.file_type"
]
=
uint32
(
1
)
m
.
KV
[
"tokenizer.ggml.model"
]
=
"llama"
m
.
KV
[
"tokenizer.ggml.tokens"
]
=
vocab
.
Tokens
m
.
KV
[
"tokenizer.ggml.scores"
]
=
vocab
.
Scores
m
.
KV
[
"tokenizer.ggml.token_type"
]
=
vocab
.
Types
m
.
KV
[
"tokenizer.ggml.bos_token_id"
]
=
uint32
(
params
.
BoSTokenID
)
m
.
KV
[
"tokenizer.ggml.eos_token_id"
]
=
uint32
(
params
.
EoSTokenID
)
m
.
KV
[
"tokenizer.ggml.unknown_token_id"
]
=
uint32
(
0
)
m
.
KV
[
"tokenizer.ggml.add_bos_token"
]
=
true
m
.
KV
[
"tokenizer.ggml.add_eos_token"
]
=
false
// llamacpp sets the chat template, however we don't need to set it since we pass it in through a layer
// m.KV["tokenizer.chat_template"] = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" // XXX removeme
c
.
V3
.
NumTensor
=
uint64
(
len
(
tensors
))
c
.
V3
.
NumKV
=
uint64
(
len
(
m
.
KV
))
f
,
err
:=
os
.
CreateTemp
(
""
,
"ollama-gguf"
)
if
err
!=
nil
{
return
""
,
err
}
defer
f
.
Close
()
err
=
m
.
Encode
(
f
)
if
err
!=
nil
{
return
""
,
err
}
return
f
.
Name
(),
nil
}
convert/sentencepiece/sentencepiece_model.pb.go
0 → 100644
View file @
2c017ca4
This diff is collapsed.
Click to expand it.
convert/sentencepiece_model.proto
0 → 100644
View file @
2c017ca4
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.!
syntax
=
"proto2"
;
// TODO(taku): Needs to use LITE RUNTIME in OSS release.
option
optimize_for
=
LITE_RUNTIME
;
option
go_package
=
"./sentencepiece"
;
package
sentencepiece
;
// TrainerSpec encodes a various parameters for SentencePiece training.
// Next id: 55
message
TrainerSpec
{
///////////////////////////////////////////////////////////////////
// General parameters
//
// Input corpus files.
// Trainer accepts the following two formats:
// A) Monolingual: plain text, one sentence per line.
// B) Bilingual: TSV, source sentence <tab> target sentence
// When bilingual data is passed, shared vocabulary model is built.
// Note that the input file must be raw corpus, not a preprocessed corpus.
// Trainer only loads the first `input_sentence_size` sentences specified
// with this parameter.
repeated
string
input
=
1
;
// Input corpus format:
// "text": one-sentence-per-line text format (default)
// "tsv": sentence <tab> freq
optional
string
input_format
=
7
;
// Output model file prefix.
// <model_prefix>.model and <model_prefix>.vocab are generated.
optional
string
model_prefix
=
2
;
// Model type. only have UNIGRAM now.
enum
ModelType
{
UNIGRAM
=
1
;
// Unigram language model with dynamic algorithm
BPE
=
2
;
// Byte Pair Encoding
WORD
=
3
;
// Delimitered by whitespace.
CHAR
=
4
;
// tokenizes into character sequence
}
optional
ModelType
model_type
=
3
[
default
=
UNIGRAM
];
// Vocabulary size. 8k is the default size.
optional
int32
vocab_size
=
4
[
default
=
8000
];
// List of the languages this model can accept.
// Since the model is language-agnostic, this field is used as a reference.
repeated
string
accept_language
=
5
;
// Size of self-test samples, which are encoded in the model file.
optional
int32
self_test_sample_size
=
6
[
default
=
0
];
// Whether to use DP version of sentencepiece. Use it with TSV input format
// (requires precomputed word tab counts to work).
optional
bool
enable_differential_privacy
=
50
[
default
=
false
];
// Set these parameters if you need DP version of sentencepiece.
// std of noise to add.
optional
float
differential_privacy_noise_level
=
51
[
default
=
0.0
];
// Clipping threshold to apply after adding noise. All the words with
// frequency less than this value are dropped.
optional
uint64
differential_privacy_clipping_threshold
=
52
[
default
=
0
];
///////////////////////////////////////////////////////////////////
// Training parameters.
//
// Uses characters which cover the corpus with the ratio of `chars_coverage`.
// This parameter determines the set of basic Alphabet of sentence piece.
// 1.0 - `chars_coverage` characters are treated as UNK.
// See also required_chars field.
optional
float
character_coverage
=
10
[
default
=
0.9995
];
// Maximum size of sentences the trainer loads from `input` parameter.
// Trainer simply loads the `input` files in sequence.
// It is better to shuffle the input corpus randomly.
optional
uint64
input_sentence_size
=
11
[
default
=
0
];
optional
bool
shuffle_input_sentence
=
19
[
default
=
true
];
// Maximum size of sentences to make seed sentence pieces.
// Extended suffix array is constructed to extract frequent
// sub-strings from the corpus. This uses 20N working space,
// where N is the size of corpus.
optional
int32
mining_sentence_size
=
12
[
deprecated
=
true
];
// Maximum size of sentences to train sentence pieces.
optional
int32
training_sentence_size
=
13
[
deprecated
=
true
];
// The size of seed sentencepieces.
// `seed_sentencepiece_size` must be larger than `vocab_size`.
optional
int32
seed_sentencepiece_size
=
14
[
default
=
1000000
];
// In every EM sub-iterations, keeps top
// `shrinking_factor` * `current sentencepieces size` with respect to
// the loss of the sentence piece. This value should be smaller than 1.0.
optional
float
shrinking_factor
=
15
[
default
=
0.75
];
// The maximum sentence length in byte. The sentences with the length
// larger than `max_sentence_length` is simply ignored.
// Longer input tends to bring the following risks:
// * Overflow during EM training (unigram language model only)
// * Performance drop because of O(n log n) cost in BPE.
optional
int32
max_sentence_length
=
18
[
default
=
4192
];
// Number of threads in the training.
optional
int32
num_threads
=
16
[
default
=
16
];
// Number of EM sub iterations.
optional
int32
num_sub_iterations
=
17
[
default
=
2
];
///////////////////////////////////////////////////////////////////
// SentencePiece parameters which control the shapes of sentence piece.
//
// Maximum length of sentencepiece.
optional
int32
max_sentencepiece_length
=
20
[
default
=
16
];
// Uses Unicode script to split sentence pieces.
// When `split_by_unicode_script` is true, we do not allow sentence piece to
// include multiple Unicode scripts, e.g. "F1" is not a valid piece.
// Exception: CJ characters (Hiragana/Katakana/Han) are all handled
// as one script type, since Japanese word can consist of multiple scripts.
// This exception is always applied regardless of the accept-language
// parameter.
optional
bool
split_by_unicode_script
=
21
[
default
=
true
];
// When `split_by_number` is true, put a boundary between number and
// non-number transition. If we want to treat "F1" is one token, set this flag
// to be false.
optional
bool
split_by_number
=
23
[
default
=
true
];
// Use a white space to split sentence pieces.
// When `split_by_whitespace` is false, we may have the piece containing
// a white space in the middle. e.g., "in_the".
optional
bool
split_by_whitespace
=
22
[
default
=
true
];
// Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
// hello_. When `treat_whitespace_as_suffix` is true,
// NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
// of sentence.
optional
bool
treat_whitespace_as_suffix
=
24
[
default
=
false
];
// Allows pieces that only contain whitespaces instead of appearing only as
// prefix or suffix of other pieces.
optional
bool
allow_whitespace_only_pieces
=
26
[
default
=
false
];
// Split all digits (0-9) into separate pieces.
optional
bool
split_digits
=
25
[
default
=
false
];
// Defines the pre-tokenization delimiter.
// When specified, no pieces crossing this delimiter is not included
// in the vocab. Then the delimiter string is virtually ignored
// during the training. This field can allows constraints on the vocabulary
// selection. Note that this field is available on unigram mode.
optional
string
pretokenization_delimiter
=
53
[
default
=
""
];
///////////////////////////////////////////////////////////////////
// Vocabulary management
//
// Defines control symbols used as an indicator to
// change the behavior of the decoder. <s> and </s> are pre-defined.
// We can use this field to encode various meta information,
// including language indicator in multilingual model.
// These symbols are not visible to users, but visible to
// the decoder. Note that when the input sentence contains control symbols,
// they are not treated as one token, but segmented into normal pieces.
// Control symbols must be inserted independently from the segmentation.
repeated
string
control_symbols
=
30
;
// Defines user defined symbols.
// These symbols are added with extremely high score
// so they are always treated as one unique symbol in any context.
// Typical usage of user_defined_symbols is placeholder for named entities.
repeated
string
user_defined_symbols
=
31
;
// Defines required characters. Each UTF8 character in this string is included
// in the character set regardless of character_coverage value. Unlike
// user_defined_symbols, these characters have scores based on the frequency
// on input sentences, and the model can form subwords using characters
// in this field.
optional
string
required_chars
=
36
;
// Decomposes unknown pieces into UTF-8 bytes.
optional
bool
byte_fallback
=
35
[
default
=
false
];
// When creating the vocabulary file, defines whether or not to additionally
// output the score for each piece.
optional
bool
vocabulary_output_piece_score
=
32
[
default
=
true
];
// `vocab_size` is treated as hard limit. Crash if
// the model can not produce the vocab of size `vocab_size`,
// When `hard_vocab_limit` is false, vocab_size is treated
// as soft limit. Note that when model_type=char,
// always assumes hard_vocab_limit = false.
optional
bool
hard_vocab_limit
=
33
[
default
=
true
];
// use all symbols for vocab extraction. This flag is valid
// if model type is either CHAR or WORD
optional
bool
use_all_vocab
=
34
[
default
=
false
];
///////////////////////////////////////////////////////////////////
// Reserved special meta tokens.
// * -1 is not used.
// * unk_id must not be -1.
// Id must starts with 0 and be contigous.
optional
int32
unk_id
=
40
[
default
=
0
];
// <unk>
optional
int32
bos_id
=
41
[
default
=
1
];
// <s>
optional
int32
eos_id
=
42
[
default
=
2
];
// </s>
optional
int32
pad_id
=
43
[
default
=
-
1
];
// <pad> (padding)
optional
string
unk_piece
=
45
[
default
=
"<unk>"
];
optional
string
bos_piece
=
46
[
default
=
"<s>"
];
optional
string
eos_piece
=
47
[
default
=
"</s>"
];
optional
string
pad_piece
=
48
[
default
=
"<pad>"
];
// Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
// since this character can be useful both for user and
// developer. We can easily figure out that <unk> is emitted.
optional
string
unk_surface
=
44
[
default
=
" \xE2\x81\x87 "
];
// Increase bit depth to allow unigram model training on large
// (>10M sentences) corpora. A Side-effect of enabling this flag
// is increased memory usage.
optional
bool
train_extremely_large_corpus
=
49
[
default
=
false
];
// Path to a seed sentencepieces file, with one tab-separated
// seed sentencepiece <tab> frequency per line.
optional
string
seed_sentencepieces_file
=
54
[
default
=
""
];
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions
200
to
max
;
}
// NormalizerSpec encodes a various parameters for string normalizaiton
message
NormalizerSpec
{
// name of normalization rule.
optional
string
name
=
1
;
// Pre-compiled normalization rule created by
// Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
// Usually this field is set by Builder::GetNormalizerSpec() method.
optional
bytes
precompiled_charsmap
=
2
;
// Adds dummy whitespace at the beginning of text in order to
// treat "world" in "world" and "hello world" in the same way.
optional
bool
add_dummy_prefix
=
3
[
default
=
true
];
// Removes leading, trailing, and duplicate internal whitespace.
optional
bool
remove_extra_whitespaces
=
4
[
default
=
true
];
// Replaces whitespace with meta symbol.
// This field must be true to train sentence piece model.
optional
bool
escape_whitespaces
=
5
[
default
=
true
];
// Custom normalization rule file in TSV format.
// https://github.com/google/sentencepiece/blob/master/doc/normalization.md
// This field is only used in SentencePieceTrainer::Train() method, which
// compiles the rule into the binary rule stored in `precompiled_charsmap`.
optional
string
normalization_rule_tsv
=
6
;
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions
200
to
max
;
}
// Proto to store samples for self-testing.
message
SelfTestData
{
message
Sample
{
optional
string
input
=
1
;
optional
string
expected
=
2
;
}
repeated
Sample
samples
=
1
;
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions
200
to
max
;
}
// ModelProto stores model parameters.
// SentencePieceProcessor is supposed to be self-contained.
// All settings/parameters which may change the behavior must be encoded
// in ModelProto.
message
ModelProto
{
message
SentencePiece
{
enum
Type
{
NORMAL
=
1
;
// normal symbol
UNKNOWN
=
2
;
// unknown symbol. only <unk> for now.
CONTROL
=
3
;
// control symbols. </s>, <s>, <2ja> etc.
USER_DEFINED
=
4
;
// user defined symbols.
// Typical usage of USER_DEFINED symbol
// is placeholder.
BYTE
=
6
;
// byte symbols. Used when `byte_fallback` is true.
UNUSED
=
5
;
// this piece is not used.
}
optional
string
piece
=
1
;
// piece must not be empty.
optional
float
score
=
2
;
optional
Type
type
=
3
[
default
=
NORMAL
];
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions
200
to
max
;
}
// Sentence pieces with scores.
repeated
SentencePiece
pieces
=
1
;
// Spec used to generate this model file.
optional
TrainerSpec
trainer_spec
=
2
;
// Spec for text normalization.
optional
NormalizerSpec
normalizer_spec
=
3
;
// Stores sample input and its expected segmentation to verify the model.
optional
SelfTestData
self_test_data
=
4
;
// Spec for text de-normalization.
optional
NormalizerSpec
denormalizer_spec
=
5
;
// Customized extensions: the range of field numbers
// are open to third-party extensions.
extensions
200
to
max
;
}
go.mod
View file @
2c017ca4
module
github.com/jmorganca/ollama
module
github.com/jmorganca/ollama
go 1.21
go 1.22
toolchain go1.22.0
require (
require (
github.com/containerd/console
v1.0.3
github.com/containerd/console
v1.0.3
github.com/d4l3k/go-bfloat16
v0.0.0-20211005043715-690c3bdd05f1
github.com/emirpasic/gods
v1.18.1
github.com/emirpasic/gods
v1.18.1
github.com/gin-gonic/gin
v1.9.1
github.com/gin-gonic/gin
v1.9.1
github.com/golang/protobuf
v1.5.0
github.com/google/uuid
v1.0.0
github.com/google/uuid
v1.0.0
github.com/mitchellh/mapstructure
v1.5.0
github.com/olekukonko/tablewriter
v0.0.5
github.com/olekukonko/tablewriter
v0.0.5
github.com/spf13/cobra
v1.7.0
github.com/spf13/cobra
v1.7.0
github.com/stretchr/testify
v1.8.4
github.com/stretchr/testify
v1.8.4
github.com/x448/float16
v0.8.4
golang.org/x/sync
v0.3.0
golang.org/x/sync
v0.3.0
)
)
require
github.com/pdevine/tensor
v0.0.0-20240228013915-64ccaa8d9ca9
require (
require (
github.com/apache/arrow/go/arrow
v0.0.0-20201229220542-30ce2eb5d4dc // indirect
github.com/chewxy/hm
v1.0.0 // indirect
github.com/chewxy/math32
v1.0.8 // indirect
github.com/davecgh/go-spew
v1.1.1 // indirect
github.com/davecgh/go-spew
v1.1.1 // indirect
github.com/gogo/protobuf
v1.3.2 // indirect
github.com/google/flatbuffers
v1.12.0 // indirect
github.com/mattn/go-runewidth
v0.0.14 // indirect
github.com/mattn/go-runewidth
v0.0.14 // indirect
github.com/pkg/errors
v0.9.1 // indirect
github.com/pmezard/go-difflib
v1.0.0 // indirect
github.com/pmezard/go-difflib
v1.0.0 // indirect
github.com/rivo/uniseg
v0.2.0 // indirect
github.com/rivo/uniseg
v0.2.0 // indirect
github.com/xtgo/set
v1.0.0 // indirect
go4.org/unsafe/assume-no-moving-gc
v0.0.0-20231121144256-b99613f794b6 // indirect
golang.org/x/xerrors
v0.0.0-20200804184101-5ec99f83aff1 // indirect
gonum.org/v1/gonum
v0.8.2 // indirect
gorgonia.org/vecf32
v0.9.0 // indirect
gorgonia.org/vecf64
v0.9.0 // indirect
)
)
require (
require (
...
@@ -38,7 +58,6 @@ require (
...
@@ -38,7 +58,6 @@ require (
github.com/mattn/go-isatty
v0.0.19 // indirect
github.com/mattn/go-isatty
v0.0.19 // indirect
github.com/modern-go/concurrent
v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/concurrent
v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2
v1.0.2 // indirect
github.com/modern-go/reflect2
v1.0.2 // indirect
github.com/pbnjay/memory
v0.0.0-20210728143218-7b4eea64cf58
github.com/pelletier/go-toml/v2
v2.0.8 // indirect
github.com/pelletier/go-toml/v2
v2.0.8 // indirect
github.com/spf13/pflag
v1.0.5 // indirect
github.com/spf13/pflag
v1.0.5 // indirect
github.com/twitchyliquid64/golang-asm
v0.15.1 // indirect
github.com/twitchyliquid64/golang-asm
v0.15.1 // indirect
...
@@ -50,6 +69,6 @@ require (
...
@@ -50,6 +69,6 @@ require (
golang.org/x/sys
v0.13.0
golang.org/x/sys
v0.13.0
golang.org/x/term
v0.13.0
golang.org/x/term
v0.13.0
golang.org/x/text
v0.13.0 // indirect
golang.org/x/text
v0.13.0 // indirect
google.golang.org/protobuf
v1.30.0
// indirect
google.golang.org/protobuf
v1.30.0
gopkg.in/yaml.v3
v3.0.1 // indirect
gopkg.in/yaml.v3
v3.0.1 // indirect
)
)
go.sum
View file @
2c017ca4
This diff is collapsed.
Click to expand it.
llm/ggml.go
View file @
2c017ca4
...
@@ -163,9 +163,9 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
...
@@ -163,9 +163,9 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
case
FILE_MAGIC_GGLA
:
case
FILE_MAGIC_GGLA
:
c
=
&
containerLORA
{}
c
=
&
containerLORA
{}
case
FILE_MAGIC_GGUF_LE
:
case
FILE_MAGIC_GGUF_LE
:
c
=
&
c
ontainerGGUF
{
bo
:
binary
.
LittleEndian
}
c
=
&
C
ontainerGGUF
{
ByteOrder
:
binary
.
LittleEndian
}
case
FILE_MAGIC_GGUF_BE
:
case
FILE_MAGIC_GGUF_BE
:
c
=
&
c
ontainerGGUF
{
bo
:
binary
.
BigEndian
}
c
=
&
C
ontainerGGUF
{
ByteOrder
:
binary
.
BigEndian
}
default
:
default
:
return
nil
,
errors
.
New
(
"invalid file magic"
)
return
nil
,
errors
.
New
(
"invalid file magic"
)
}
}
...
...
llm/gguf.go
View file @
2c017ca4
This diff is collapsed.
Click to expand it.
server/images.go
View file @
2c017ca4
package
server
package
server
import
(
import
(
"archive/zip"
"bytes"
"bytes"
"context"
"context"
"crypto/sha256"
"crypto/sha256"
...
@@ -23,6 +24,7 @@ import (
...
@@ -23,6 +24,7 @@ import (
"golang.org/x/exp/slices"
"golang.org/x/exp/slices"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/convert"
"github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/version"
"github.com/jmorganca/ollama/version"
...
@@ -316,7 +318,24 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
...
@@ -316,7 +318,24 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
c
.
Args
=
blobPath
c
.
Args
=
blobPath
}
}
bin
,
err
:=
os
.
Open
(
realpath
(
modelFileDir
,
c
.
Args
))
pathName
:=
realpath
(
modelFileDir
,
c
.
Args
)
ggufName
,
err
:=
convertSafetensors
(
name
,
pathName
)
if
err
!=
nil
{
switch
{
case
errors
.
Is
(
err
,
zip
.
ErrFormat
)
:
// it's not a safetensor archive
default
:
return
err
}
}
if
ggufName
!=
""
{
pathName
=
ggufName
defer
os
.
RemoveAll
(
ggufName
)
}
bin
,
err
:=
os
.
Open
(
pathName
)
if
err
!=
nil
{
if
err
!=
nil
{
// not a file on disk so must be a model reference
// not a file on disk so must be a model reference
modelpath
:=
ParseModelPath
(
c
.
Args
)
modelpath
:=
ParseModelPath
(
c
.
Args
)
...
@@ -592,6 +611,73 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
...
@@ -592,6 +611,73 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
return
nil
return
nil
}
}
func
convertSafetensors
(
name
,
fn
string
)
(
string
,
error
)
{
r
,
err
:=
zip
.
OpenReader
(
fn
)
if
err
!=
nil
{
return
""
,
err
}
defer
r
.
Close
()
tempDir
,
err
:=
os
.
MkdirTemp
(
""
,
"ollama-convert"
)
if
err
!=
nil
{
return
""
,
err
}
defer
os
.
RemoveAll
(
tempDir
)
for
_
,
f
:=
range
r
.
File
{
fpath
:=
filepath
.
Join
(
tempDir
,
f
.
Name
)
outFile
,
err
:=
os
.
OpenFile
(
fpath
,
os
.
O_WRONLY
|
os
.
O_CREATE
|
os
.
O_TRUNC
,
f
.
Mode
())
if
err
!=
nil
{
return
""
,
err
}
rc
,
err
:=
f
.
Open
()
if
err
!=
nil
{
return
""
,
err
}
_
,
err
=
io
.
Copy
(
outFile
,
rc
)
if
err
!=
nil
{
return
""
,
err
}
outFile
.
Close
()
rc
.
Close
()
}
params
,
err
:=
convert
.
GetParams
(
tempDir
)
if
err
!=
nil
{
return
""
,
err
}
SupportedArchs
:=
[]
string
{
"MistralForCausalLM"
,
}
for
_
,
arch
:=
range
params
.
Architectures
{
if
!
slices
.
Contains
(
SupportedArchs
,
arch
)
{
return
""
,
fmt
.
Errorf
(
"this safetensors model is not yet supported"
)
}
}
t
,
err
:=
convert
.
GetSafeTensors
(
tempDir
)
if
err
!=
nil
{
return
""
,
err
}
vocab
,
err
:=
convert
.
LoadTokens
(
tempDir
)
if
err
!=
nil
{
return
""
,
err
}
fn
,
err
=
convert
.
WriteGGUF
(
name
,
t
,
params
,
vocab
)
if
err
!=
nil
{
return
""
,
err
}
return
fn
,
nil
}
func
CopyModel
(
src
,
dest
string
)
error
{
func
CopyModel
(
src
,
dest
string
)
error
{
srcModelPath
:=
ParseModelPath
(
src
)
srcModelPath
:=
ParseModelPath
(
src
)
srcPath
,
err
:=
srcModelPath
.
GetManifestPath
()
srcPath
,
err
:=
srcModelPath
.
GetManifestPath
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment