Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
0c819e16
Unverified
Commit
0c819e16
authored
Aug 23, 2024
by
Patrick Devine
Committed by
GitHub
Aug 23, 2024
Browse files
convert safetensor adapters into GGUF (#6327)
parent
7a1e1c1c
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
697 additions
and
101 deletions
+697
-101
cmd/cmd.go
cmd/cmd.go
+6
-0
convert/convert.go
convert/convert.go
+97
-14
convert/convert_bert.go
convert/convert_bert.go
+9
-9
convert/convert_gemma.go
convert/convert_gemma.go
+9
-9
convert/convert_gemma2.go
convert/convert_gemma2.go
+6
-6
convert/convert_gemma2_adapter.go
convert/convert_gemma2_adapter.go
+91
-0
convert/convert_llama.go
convert/convert_llama.go
+8
-8
convert/convert_llama_adapter.go
convert/convert_llama_adapter.go
+169
-0
convert/convert_mixtral.go
convert/convert_mixtral.go
+8
-8
convert/convert_phi3.go
convert/convert_phi3.go
+7
-7
convert/convert_test.go
convert/convert_test.go
+238
-24
convert/reader.go
convert/reader.go
+2
-0
llm/ggml.go
llm/ggml.go
+8
-0
server/images.go
server/images.go
+6
-5
server/model.go
server/model.go
+30
-8
server/model_test.go
server/model_test.go
+3
-3
No files found.
cmd/cmd.go
View file @
0c819e16
...
@@ -204,6 +204,12 @@ func tempZipFiles(path string) (string, error) {
...
@@ -204,6 +204,12 @@ func tempZipFiles(path string) (string, error) {
// safetensors files might be unresolved git lfs references; skip if they are
// safetensors files might be unresolved git lfs references; skip if they are
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
files
=
append
(
files
,
st
...
)
files
=
append
(
files
,
st
...
)
}
else
if
st
,
_
:=
glob
(
filepath
.
Join
(
path
,
"adapters.safetensors"
),
"application/octet-stream"
);
len
(
st
)
>
0
{
// covers adapters.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
st
,
_
:=
glob
(
filepath
.
Join
(
path
,
"adapter_model.safetensors"
),
"application/octet-stream"
);
len
(
st
)
>
0
{
// covers adapter_model.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
pt
,
_
:=
glob
(
filepath
.
Join
(
path
,
"pytorch_model*.bin"
),
"application/zip"
);
len
(
pt
)
>
0
{
}
else
if
pt
,
_
:=
glob
(
filepath
.
Join
(
path
,
"pytorch_model*.bin"
),
"application/zip"
);
len
(
pt
)
>
0
{
// pytorch files might also be unresolved git lfs references; skip if they are
// pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
...
...
convert/convert.go
View file @
0c819e16
...
@@ -12,12 +12,22 @@ import (
...
@@ -12,12 +12,22 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
)
)
type
Parameters
struct
{
type
Model
Parameters
struct
{
Architectures
[]
string
`json:"architectures"`
Architectures
[]
string
`json:"architectures"`
VocabSize
uint32
`json:"vocab_size"`
VocabSize
uint32
`json:"vocab_size"`
}
}
func
(
Parameters
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
type
AdapterParameters
struct
{
Alpha
uint32
`json:"lora_alpha"`
LoraLayers
uint32
`json:"lora_layers"`
LoraParameters
struct
{
Rank
uint32
`json:"rank"`
Alpha
float32
`json:"alpha"`
Scale
float32
`json:"scale"`
}
`json:"lora_parameters"`
}
func
(
ModelParameters
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
llm
.
KV
{
kv
:=
llm
.
KV
{
"general.file_type"
:
uint32
(
1
),
"general.file_type"
:
uint32
(
1
),
"general.quantization_version"
:
uint32
(
2
),
"general.quantization_version"
:
uint32
(
2
),
...
@@ -44,17 +54,40 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
...
@@ -44,17 +54,40 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
return
kv
return
kv
}
}
func
(
Parameters
)
specialTokenTypes
()
[]
string
{
func
(
p
AdapterParameters
)
KV
()
llm
.
KV
{
var
alpha
float32
if
p
.
LoraParameters
.
Alpha
==
0
{
alpha
=
float32
(
p
.
Alpha
)
}
else
{
alpha
=
p
.
LoraParameters
.
Alpha
}
kv
:=
llm
.
KV
{
"adapter.lora.alpha"
:
alpha
,
"adapter.type"
:
"lora"
,
"general.file_type"
:
uint32
(
1
),
"general.type"
:
"adapter"
,
"general.version"
:
"v0.2"
,
}
return
kv
}
func
(
ModelParameters
)
specialTokenTypes
()
[]
string
{
return
[]
string
{
return
[]
string
{
"bos"
,
"eos"
,
"unk"
,
"sep"
,
"pad"
,
"cls"
,
"mask"
,
"bos"
,
"eos"
,
"unk"
,
"sep"
,
"pad"
,
"cls"
,
"mask"
,
}
}
}
}
func
(
Parameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
func
(
Model
Parameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
return
llm
.
WriteGGUF
(
ws
,
kv
,
ts
)
return
llm
.
WriteGGUF
(
ws
,
kv
,
ts
)
}
}
type
Converter
interface
{
func
(
AdapterParameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
return
llm
.
WriteGGUF
(
ws
,
kv
,
ts
)
}
type
ModelConverter
interface
{
// KV maps parameters to LLM key-values
// KV maps parameters to LLM key-values
KV
(
*
Tokenizer
)
llm
.
KV
KV
(
*
Tokenizer
)
llm
.
KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
...
@@ -73,17 +106,67 @@ type moreParser interface {
...
@@ -73,17 +106,67 @@ type moreParser interface {
parseMore
(
fs
.
FS
)
error
parseMore
(
fs
.
FS
)
error
}
}
type
AdapterConverter
interface
{
// KV maps parameters to LLM key-values
KV
(
llm
.
KV
)
llm
.
KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors
([]
Tensor
)
[]
llm
.
Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements
()
[]
string
writeFile
(
io
.
WriteSeeker
,
llm
.
KV
,
[]
llm
.
Tensor
)
error
}
func
ConvertAdapter
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
,
baseKV
llm
.
KV
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"adapter_config.json"
)
if
err
!=
nil
{
return
err
}
var
p
AdapterParameters
if
err
:=
json
.
Unmarshal
(
bts
,
&
p
);
err
!=
nil
{
return
err
}
arch
,
ok
:=
baseKV
[
"general.architecture"
]
if
!
ok
{
return
errors
.
New
(
"architecture not set for the base model"
)
}
var
conv
AdapterConverter
switch
arch
{
case
"llama"
:
conv
=
&
llamaAdapter
{}
case
"gemma2"
:
conv
=
&
gemma2Adapter
{}
default
:
return
errors
.
New
(
"unsupported architecture"
)
}
ts
,
err
:=
parseTensors
(
fsys
,
strings
.
NewReplacer
(
conv
.
Replacements
()
...
))
if
err
!=
nil
{
return
err
}
if
err
:=
json
.
Unmarshal
(
bts
,
conv
);
err
!=
nil
{
return
err
}
return
conv
.
writeFile
(
ws
,
conv
.
KV
(
baseKV
),
conv
.
Tensors
(
ts
))
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func
Convert
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
)
error
{
func
Convert
Model
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"config.json"
)
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"config.json"
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
var
p
Parameters
var
p
Model
Parameters
if
err
:=
json
.
Unmarshal
(
bts
,
&
p
);
err
!=
nil
{
if
err
:=
json
.
Unmarshal
(
bts
,
&
p
);
err
!=
nil
{
return
err
return
err
}
}
...
@@ -92,20 +175,20 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
...
@@ -92,20 +175,20 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
return
errors
.
New
(
"unknown architecture"
)
return
errors
.
New
(
"unknown architecture"
)
}
}
var
conv
Converter
var
conv
Model
Converter
switch
p
.
Architectures
[
0
]
{
switch
p
.
Architectures
[
0
]
{
case
"LlamaForCausalLM"
,
"MistralForCausalLM"
:
case
"LlamaForCausalLM"
,
"MistralForCausalLM"
:
conv
=
&
llama
{}
conv
=
&
llama
Model
{}
case
"MixtralForCausalLM"
:
case
"MixtralForCausalLM"
:
conv
=
&
mixtral
{}
conv
=
&
mixtral
Model
{}
case
"GemmaForCausalLM"
:
case
"GemmaForCausalLM"
:
conv
=
&
gemma
{}
conv
=
&
gemma
Model
{}
case
"Gemma2ForCausalLM"
:
case
"Gemma2ForCausalLM"
:
conv
=
&
gemma2
{}
conv
=
&
gemma2
Model
{}
case
"Phi3ForCausalLM"
:
case
"Phi3ForCausalLM"
:
conv
=
&
phi3
{}
conv
=
&
phi3
Model
{}
case
"BertModel"
:
case
"BertModel"
:
conv
=
&
bert
{}
conv
=
&
bert
Model
{}
default
:
default
:
return
errors
.
New
(
"unsupported architecture"
)
return
errors
.
New
(
"unsupported architecture"
)
}
}
...
...
convert/convert_bert.go
View file @
0c819e16
...
@@ -11,8 +11,8 @@ import (
...
@@ -11,8 +11,8 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
)
)
type
bert
struct
{
type
bert
Model
struct
{
Parameters
Model
Parameters
NLayers
uint32
`json:"n_layers"`
NLayers
uint32
`json:"n_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayer
uint32
`json:"n_layer"`
NLayer
uint32
`json:"n_layer"`
...
@@ -33,11 +33,11 @@ type bert struct {
...
@@ -33,11 +33,11 @@ type bert struct {
}
}
var
(
var
(
_
Converter
=
(
*
bert
)(
nil
)
_
Model
Converter
=
(
*
bert
Model
)(
nil
)
_
moreParser
=
(
*
bert
)(
nil
)
_
moreParser
=
(
*
bert
Model
)(
nil
)
)
)
func
(
p
*
bert
)
parseMore
(
fsys
fs
.
FS
)
error
{
func
(
p
*
bert
Model
)
parseMore
(
fsys
fs
.
FS
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"modules.json"
)
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"modules.json"
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
...
@@ -85,8 +85,8 @@ func (p *bert) parseMore(fsys fs.FS) error {
...
@@ -85,8 +85,8 @@ func (p *bert) parseMore(fsys fs.FS) error {
return
nil
return
nil
}
}
func
(
p
*
bert
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
bert
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"bert"
kv
[
"general.architecture"
]
=
"bert"
kv
[
"bert.attention.causal"
]
=
false
kv
[
"bert.attention.causal"
]
=
false
kv
[
"bert.pooling_type"
]
=
p
.
PoolingType
kv
[
"bert.pooling_type"
]
=
p
.
PoolingType
...
@@ -132,7 +132,7 @@ func (p *bert) KV(t *Tokenizer) llm.KV {
...
@@ -132,7 +132,7 @@ func (p *bert) KV(t *Tokenizer) llm.KV {
return
kv
return
kv
}
}
func
(
p
*
bert
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
bert
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
for
_
,
t
:=
range
ts
{
if
slices
.
Contains
([]
string
{
if
slices
.
Contains
([]
string
{
...
@@ -154,7 +154,7 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
...
@@ -154,7 +154,7 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
return
out
return
out
}
}
func
(
bert
)
Replacements
()
[]
string
{
func
(
bert
Model
)
Replacements
()
[]
string
{
return
[]
string
{
return
[]
string
{
"encoder.layer"
,
"blk"
,
"encoder.layer"
,
"blk"
,
"encoder.layers"
,
"blk"
,
"encoder.layers"
,
"blk"
,
...
...
convert/convert_gemma.go
View file @
0c819e16
...
@@ -9,8 +9,8 @@ import (
...
@@ -9,8 +9,8 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
)
)
type
gemma
struct
{
type
gemma
Model
struct
{
Parameters
Model
Parameters
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
...
@@ -21,10 +21,10 @@ type gemma struct {
...
@@ -21,10 +21,10 @@ type gemma struct {
HeadDim
uint32
`json:"head_dim"`
HeadDim
uint32
`json:"head_dim"`
}
}
var
_
Converter
=
(
*
gemma
)(
nil
)
var
_
Model
Converter
=
(
*
gemma
Model
)(
nil
)
func
(
p
*
gemma
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
gemma
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma"
kv
[
"general.architecture"
]
=
"gemma"
kv
[
"gemma.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma.embedding_length"
]
=
p
.
HiddenSize
...
@@ -42,8 +42,8 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
...
@@ -42,8 +42,8 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
return
kv
return
kv
}
}
func
(
p
*
gemma
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
gemma
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
out
:=
make
([]
llm
.
Tensor
,
0
,
len
(
ts
))
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
for
_
,
t
:=
range
ts
{
if
strings
.
HasSuffix
(
t
.
Name
(),
"_norm.weight"
)
{
if
strings
.
HasSuffix
(
t
.
Name
(),
"_norm.weight"
)
{
t
.
SetRepacker
(
p
.
addOne
)
t
.
SetRepacker
(
p
.
addOne
)
...
@@ -60,7 +60,7 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
...
@@ -60,7 +60,7 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
return
out
return
out
}
}
func
(
p
*
gemma
)
Replacements
()
[]
string
{
func
(
p
*
gemma
Model
)
Replacements
()
[]
string
{
return
[]
string
{
return
[]
string
{
"model.embed_tokens"
,
"token_embd"
,
"model.embed_tokens"
,
"token_embd"
,
"model.norm"
,
"output_norm"
,
"model.norm"
,
"output_norm"
,
...
@@ -77,7 +77,7 @@ func (p *gemma) Replacements() []string {
...
@@ -77,7 +77,7 @@ func (p *gemma) Replacements() []string {
}
}
}
}
func
(
*
gemma
)
addOne
(
_
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
func
(
*
gemma
Model
)
addOne
(
_
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
n
:=
tensor
.
New
(
tensor
.
WithShape
(
int
(
shape
[
0
])),
tensor
.
WithBacking
(
data
))
n
:=
tensor
.
New
(
tensor
.
WithShape
(
int
(
shape
[
0
])),
tensor
.
WithBacking
(
data
))
ones
:=
tensor
.
Ones
(
tensor
.
Float32
,
int
(
shape
[
0
]))
ones
:=
tensor
.
Ones
(
tensor
.
Float32
,
int
(
shape
[
0
]))
...
...
convert/convert_gemma2.go
View file @
0c819e16
...
@@ -4,15 +4,15 @@ import (
...
@@ -4,15 +4,15 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
)
)
type
gemma2
struct
{
type
gemma2
Model
struct
{
gemma
gemma
Model
SlidingWindow
uint32
`json:"sliding_window"`
SlidingWindow
uint32
`json:"sliding_window"`
AttentionLogitSoftcap
float32
`json:"attn_logit_softcapping"`
AttentionLogitSoftcap
float32
`json:"attn_logit_softcapping"`
FinalLogitSoftcap
float32
`json:"final_logit_softcapping"`
FinalLogitSoftcap
float32
`json:"final_logit_softcapping"`
}
}
func
(
p
*
gemma2
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
gemma2
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma2"
kv
[
"general.architecture"
]
=
"gemma2"
kv
[
"gemma2.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma2.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma2.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma2.embedding_length"
]
=
p
.
HiddenSize
...
@@ -33,9 +33,9 @@ func (p *gemma2) KV(t *Tokenizer) llm.KV {
...
@@ -33,9 +33,9 @@ func (p *gemma2) KV(t *Tokenizer) llm.KV {
return
kv
return
kv
}
}
func
(
p
*
gemma2
)
Replacements
()
[]
string
{
func
(
p
*
gemma2
Model
)
Replacements
()
[]
string
{
return
append
(
return
append
(
p
.
gemma
.
Replacements
(),
p
.
gemma
Model
.
Replacements
(),
"post_attention_layernorm"
,
"post_attention_norm"
,
"post_attention_layernorm"
,
"post_attention_norm"
,
"pre_feedforward_layernorm"
,
"ffn_norm"
,
"pre_feedforward_layernorm"
,
"ffn_norm"
,
"post_feedforward_layernorm"
,
"post_ffw_norm"
,
"post_feedforward_layernorm"
,
"post_ffw_norm"
,
...
...
convert/convert_gemma2_adapter.go
0 → 100644
View file @
0c819e16
package
convert
import
(
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type
gemma2Adapter
struct
{
AdapterParameters
}
var
_
AdapterConverter
=
(
*
gemma2Adapter
)(
nil
)
func
(
p
*
gemma2Adapter
)
KV
(
baseKV
llm
.
KV
)
llm
.
KV
{
kv
:=
p
.
AdapterParameters
.
KV
()
kv
[
"general.architecture"
]
=
"gemma2"
return
kv
}
func
(
p
*
gemma2Adapter
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
shape
:=
t
.
Shape
()
if
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_a"
)
&&
shape
[
0
]
>
shape
[
1
])
||
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_b"
)
&&
shape
[
0
]
<
shape
[
1
])
{
shape
[
0
],
shape
[
1
]
=
shape
[
1
],
shape
[
0
]
t
.
SetRepacker
(
p
.
repack
)
}
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
t
.
Name
(),
Kind
:
t
.
Kind
(),
Shape
:
t
.
Shape
(),
WriterTo
:
t
,
})
}
return
out
}
func
(
p
*
gemma2Adapter
)
Replacements
()
[]
string
{
return
[]
string
{
"base_model.model."
,
""
,
"model.layers"
,
"blk"
,
"self_attn.q_proj"
,
"attn_q"
,
"self_attn.k_proj"
,
"attn_k"
,
"self_attn.v_proj"
,
"attn_v"
,
"self_attn.o_proj"
,
"attn_output"
,
"mlp.gate_proj"
,
"ffn_gate"
,
"mlp.down_proj"
,
"ffn_down"
,
"mlp.up_proj"
,
"ffn_up"
,
"lora_A.weight"
,
"weight.lora_a"
,
"lora_B.weight"
,
"weight.lora_b"
,
"lora_a"
,
"weight.lora_a"
,
"lora_b"
,
"weight.lora_b"
,
}
}
func
(
p
*
gemma2Adapter
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
if
err
:=
n
.
T
(
1
,
0
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
convert/convert_llama.go
View file @
0c819e16
...
@@ -12,8 +12,8 @@ import (
...
@@ -12,8 +12,8 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
)
)
type
llama
struct
{
type
llama
Model
struct
{
Parameters
Model
Parameters
NLayers
uint32
`json:"n_layers"`
NLayers
uint32
`json:"n_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayer
uint32
`json:"n_layer"`
NLayer
uint32
`json:"n_layer"`
...
@@ -44,10 +44,10 @@ type llama struct {
...
@@ -44,10 +44,10 @@ type llama struct {
HeadDim
uint32
`json:"head_dim"`
HeadDim
uint32
`json:"head_dim"`
}
}
var
_
Converter
=
(
*
llama
)(
nil
)
var
_
Model
Converter
=
(
*
llama
Model
)(
nil
)
func
(
p
*
llama
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
llama
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"llama"
kv
[
"general.architecture"
]
=
"llama"
kv
[
"llama.vocab_size"
]
=
p
.
VocabSize
kv
[
"llama.vocab_size"
]
=
p
.
VocabSize
...
@@ -120,7 +120,7 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
...
@@ -120,7 +120,7 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
return
kv
return
kv
}
}
func
(
p
*
llama
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
llama
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
var
out
[]
llm
.
Tensor
if
p
.
RopeScaling
.
factors
!=
nil
{
if
p
.
RopeScaling
.
factors
!=
nil
{
...
@@ -149,7 +149,7 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
...
@@ -149,7 +149,7 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
return
out
return
out
}
}
func
(
p
*
llama
)
Replacements
()
[]
string
{
func
(
p
*
llama
Model
)
Replacements
()
[]
string
{
return
[]
string
{
return
[]
string
{
"lm_head"
,
"output"
,
"lm_head"
,
"output"
,
"model.embed_tokens"
,
"token_embd"
,
"model.embed_tokens"
,
"token_embd"
,
...
@@ -167,7 +167,7 @@ func (p *llama) Replacements() []string {
...
@@ -167,7 +167,7 @@ func (p *llama) Replacements() []string {
}
}
}
}
func
(
p
*
llama
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
func
(
p
*
llama
Model
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
var
dims
[]
int
var
dims
[]
int
for
_
,
dim
:=
range
shape
{
for
_
,
dim
:=
range
shape
{
dims
=
append
(
dims
,
int
(
dim
))
dims
=
append
(
dims
,
int
(
dim
))
...
...
convert/convert_llama_adapter.go
0 → 100644
View file @
0c819e16
package
convert
import
(
"cmp"
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type
llamaAdapter
struct
{
AdapterParameters
NumAttentionHeads
uint32
`json:"num_attention_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
}
var
_
AdapterConverter
=
(
*
llamaAdapter
)(
nil
)
func
(
p
*
llamaAdapter
)
KV
(
baseKV
llm
.
KV
)
llm
.
KV
{
kv
:=
p
.
AdapterParameters
.
KV
()
kv
[
"general.architecture"
]
=
"llama"
kv
[
"llama.attention.head_count"
]
=
baseKV
[
"llama.attention.head_count"
]
kv
[
"llama.attention.head_count_kv"
]
=
baseKV
[
"llama.attention.head_count_kv"
]
p
.
NumAttentionHeads
=
baseKV
[
"llama.attention.head_count"
]
.
(
uint32
)
return
kv
}
func
(
p
*
llamaAdapter
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
shape
:=
t
.
Shape
()
if
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_a"
)
&&
shape
[
0
]
>
shape
[
1
])
||
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_b"
)
&&
shape
[
0
]
<
shape
[
1
])
{
shape
[
0
],
shape
[
1
]
=
shape
[
1
],
shape
[
0
]
t
.
SetRepacker
(
p
.
repackAndTranspose
)
}
else
{
t
.
SetRepacker
(
p
.
repack
)
}
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
t
.
Name
(),
Kind
:
t
.
Kind
(),
Shape
:
shape
,
WriterTo
:
t
,
})
}
return
out
}
func
(
p
*
llamaAdapter
)
Replacements
()
[]
string
{
return
[]
string
{
"base_model.model."
,
""
,
"model.layers"
,
"blk"
,
"self_attn.q_proj"
,
"attn_q"
,
"self_attn.k_proj"
,
"attn_k"
,
"self_attn.v_proj"
,
"attn_v"
,
"self_attn.o_proj"
,
"attn_output"
,
"mlp.gate_proj"
,
"ffn_gate"
,
"mlp.down_proj"
,
"ffn_down"
,
"mlp.up_proj"
,
"ffn_up"
,
"lora_A.weight"
,
"weight.lora_a"
,
"lora_B.weight"
,
"weight.lora_b"
,
"lora_a"
,
"weight.lora_a"
,
"lora_b"
,
"weight.lora_b"
,
}
}
func
(
p
*
llamaAdapter
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
var
heads
uint32
if
strings
.
HasSuffix
(
name
,
"attn_q.weight.lora_a"
)
{
heads
=
p
.
NumAttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight.lora_a"
)
{
heads
=
cmp
.
Or
(
p
.
NumKeyValueHeads
,
p
.
NumAttentionHeads
)
}
else
{
return
data
,
nil
}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
if
err
:=
n
.
Reshape
(
append
([]
int
{
int
(
heads
),
2
,
dims
[
0
]
/
int
(
heads
)
/
2
},
dims
[
1
:
]
...
)
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
T
(
0
,
2
,
1
,
3
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
func
(
p
*
llamaAdapter
)
repackAndTranspose
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
var
heads
uint32
if
strings
.
HasSuffix
(
name
,
"attn_q.weight.lora_a"
)
{
heads
=
p
.
NumAttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight.lora_a"
)
{
heads
=
cmp
.
Or
(
p
.
NumKeyValueHeads
,
p
.
NumAttentionHeads
)
}
if
heads
>
0
{
if
err
:=
n
.
Reshape
(
append
([]
int
{
int
(
heads
),
2
,
dims
[
0
]
/
int
(
heads
)
/
2
},
dims
[
1
:
]
...
)
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
T
(
0
,
2
,
1
,
3
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
}
if
err
:=
n
.
T
(
1
,
0
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
convert/convert_mixtral.go
View file @
0c819e16
...
@@ -9,14 +9,14 @@ import (
...
@@ -9,14 +9,14 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
)
)
type
mixtral
struct
{
type
mixtral
Model
struct
{
llama
llama
Model
NumLocalExperts
uint32
`json:"num_local_experts"`
NumLocalExperts
uint32
`json:"num_local_experts"`
NumExpertsPerToken
uint32
`json:"num_experts_per_tok"`
NumExpertsPerToken
uint32
`json:"num_experts_per_tok"`
}
}
func
(
p
*
mixtral
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
mixtral
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
llama
.
KV
(
t
)
kv
:=
p
.
llama
Model
.
KV
(
t
)
if
p
.
NumLocalExperts
>
0
{
if
p
.
NumLocalExperts
>
0
{
kv
[
"llama.expert_count"
]
=
p
.
NumLocalExperts
kv
[
"llama.expert_count"
]
=
p
.
NumLocalExperts
...
@@ -29,7 +29,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV {
...
@@ -29,7 +29,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV {
return
kv
return
kv
}
}
func
(
p
*
mixtral
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
mixtral
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
oldnew
:=
[]
string
{
oldnew
:=
[]
string
{
"model.layers"
,
"blk"
,
"model.layers"
,
"blk"
,
"w1"
,
"ffn_gate_exps"
,
"w1"
,
"ffn_gate_exps"
,
...
@@ -67,12 +67,12 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
...
@@ -67,12 +67,12 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
})
})
}
}
return
append
(
out
,
p
.
llama
.
Tensors
(
ts
)
...
)
return
append
(
out
,
p
.
llama
Model
.
Tensors
(
ts
)
...
)
}
}
func
(
p
*
mixtral
)
Replacements
()
[]
string
{
func
(
p
*
mixtral
Model
)
Replacements
()
[]
string
{
return
append
(
return
append
(
p
.
llama
.
Replacements
(),
p
.
llama
Model
.
Replacements
(),
"block_sparse_moe.gate"
,
"ffn_gate_inp"
,
"block_sparse_moe.gate"
,
"ffn_gate_inp"
,
)
)
}
}
...
...
convert/convert_phi3.go
View file @
0c819e16
...
@@ -11,8 +11,8 @@ import (
...
@@ -11,8 +11,8 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
)
)
type
phi3
struct
{
type
phi3
Model
struct
{
Parameters
Model
Parameters
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayers
uint32
`json:"n_layers"`
NLayers
uint32
`json:"n_layers"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenSize
uint32
`json:"hidden_size"`
...
@@ -35,10 +35,10 @@ type phi3 struct {
...
@@ -35,10 +35,10 @@ type phi3 struct {
SlidingWindow
uint32
`json:"sliding_window"`
SlidingWindow
uint32
`json:"sliding_window"`
}
}
var
_
Converter
=
(
*
phi3
)(
nil
)
var
_
Model
Converter
=
(
*
phi3
Model
)(
nil
)
func
(
p
*
phi3
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
phi3
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"phi3"
kv
[
"general.architecture"
]
=
"phi3"
kv
[
"phi3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"phi3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"phi3.embedding_length"
]
=
cmp
.
Or
(
p
.
HiddenSize
,
p
.
NEmbd
)
kv
[
"phi3.embedding_length"
]
=
cmp
.
Or
(
p
.
HiddenSize
,
p
.
NEmbd
)
...
@@ -68,7 +68,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
...
@@ -68,7 +68,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
return
kv
return
kv
}
}
func
(
p
*
phi3
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
phi3
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
addRopeFactors
sync
.
Once
var
addRopeFactors
sync
.
Once
out
:=
make
([]
llm
.
Tensor
,
0
,
len
(
ts
)
+
2
)
out
:=
make
([]
llm
.
Tensor
,
0
,
len
(
ts
)
+
2
)
...
@@ -100,7 +100,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
...
@@ -100,7 +100,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
return
out
return
out
}
}
func
(
p
*
phi3
)
Replacements
()
[]
string
{
func
(
p
*
phi3
Model
)
Replacements
()
[]
string
{
return
[]
string
{
return
[]
string
{
"lm_head"
,
"output"
,
"lm_head"
,
"output"
,
"model.embed_tokens"
,
"token_embd"
,
"model.embed_tokens"
,
"token_embd"
,
...
...
convert/convert_test.go
View file @
0c819e16
package
convert
package
convert
import
(
import
(
"bytes"
"crypto/sha256"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"encoding/hex"
"encoding/json"
"encoding/json"
"flag"
"flag"
...
@@ -29,7 +31,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
...
@@ -29,7 +31,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
}
}
defer
f
.
Close
()
defer
f
.
Close
()
if
err
:=
Convert
(
fsys
,
f
);
err
!=
nil
{
if
err
:=
Convert
Model
(
fsys
,
f
);
err
!=
nil
{
t
.
Fatal
(
err
)
t
.
Fatal
(
err
)
}
}
...
@@ -51,6 +53,34 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
...
@@ -51,6 +53,34 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
return
r
,
m
.
KV
(),
m
.
Tensors
()
return
r
,
m
.
KV
(),
m
.
Tensors
()
}
}
func
generateResultsJSON
(
t
*
testing
.
T
,
f
*
os
.
File
,
kv
llm
.
KV
,
tensors
llm
.
Tensors
)
map
[
string
]
string
{
actual
:=
make
(
map
[
string
]
string
)
for
k
,
v
:=
range
kv
{
if
s
,
ok
:=
v
.
(
json
.
Marshaler
);
!
ok
{
actual
[
k
]
=
fmt
.
Sprintf
(
"%v"
,
v
)
}
else
{
bts
,
err
:=
json
.
Marshal
(
s
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
k
]
=
fmt
.
Sprintf
(
"%x"
,
sha256
.
Sum256
(
bts
))
}
}
for
_
,
tensor
:=
range
tensors
.
Items
{
sha256sum
:=
sha256
.
New
()
sr
:=
io
.
NewSectionReader
(
f
,
int64
(
tensors
.
Offset
+
tensor
.
Offset
),
int64
(
tensor
.
Size
()))
if
_
,
err
:=
io
.
Copy
(
sha256sum
,
sr
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
tensor
.
Name
]
=
hex
.
EncodeToString
(
sha256sum
.
Sum
(
nil
))
}
return
actual
}
func
TestMain
(
m
*
testing
.
M
)
{
func
TestMain
(
m
*
testing
.
M
)
{
var
level
slog
.
Level
var
level
slog
.
Level
flag
.
TextVar
(
&
level
,
"level"
,
slog
.
LevelInfo
,
"log level"
)
flag
.
TextVar
(
&
level
,
"level"
,
slog
.
LevelInfo
,
"log level"
)
...
@@ -85,49 +115,233 @@ func TestConvertFull(t *testing.T) {
...
@@ -85,49 +115,233 @@ func TestConvertFull(t *testing.T) {
}
}
f
,
kv
,
tensors
:=
convertFull
(
t
,
os
.
DirFS
(
p
))
f
,
kv
,
tensors
:=
convertFull
(
t
,
os
.
DirFS
(
p
))
actual
:=
make
(
map
[
string
]
string
)
actual
:=
generateResultsJSON
(
t
,
f
,
kv
,
tensors
)
for
k
,
v
:=
range
kv
{
if
s
,
ok
:=
v
.
(
json
.
Marshaler
);
!
ok
{
expectFile
,
err
:=
os
.
Open
(
filepath
.
Join
(
"testdata"
,
fmt
.
Sprintf
(
"%s.json"
,
tt
)))
actual
[
k
]
=
fmt
.
Sprintf
(
"%v"
,
v
)
}
else
{
bts
,
err
:=
json
.
Marshal
(
s
)
if
err
!=
nil
{
if
err
!=
nil
{
t
.
Fatal
(
err
)
t
.
Fatal
(
err
)
}
}
actual
[
k
]
=
fmt
.
Sprintf
(
"%x"
,
sha256
.
Sum256
(
bts
))
var
expect
map
[
string
]
string
if
err
:=
json
.
NewDecoder
(
expectFile
)
.
Decode
(
&
expect
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
keys
:=
maps
.
Keys
(
expect
)
slices
.
Sort
(
keys
)
for
_
,
k
:=
range
keys
{
if
v
,
ok
:=
actual
[
k
];
!
ok
{
t
.
Errorf
(
"missing %s"
,
k
)
}
else
if
v
!=
expect
[
k
]
{
t
.
Errorf
(
"unexpected %s: want %s, got %s"
,
k
,
expect
[
k
],
v
)
}
}
}
}
})
}
}
for
_
,
tensor
:=
range
tensors
.
Items
{
func
TestConvertAdapter
(
t
*
testing
.
T
)
{
sha256sum
:=
sha256
.
New
()
type
AdapterCase
struct
{
sr
:=
io
.
NewSectionReader
(
f
,
int64
(
tensors
.
Offset
+
tensor
.
Offset
),
int64
(
tensor
.
Size
()))
Name
string
if
_
,
err
:=
io
.
Copy
(
sha256sum
,
sr
);
err
!=
nil
{
BaseKV
map
[
string
]
any
Expected
map
[
string
]
string
}
cases
:=
[]
AdapterCase
{
{
Name
:
"discollama"
,
BaseKV
:
map
[
string
]
any
{
"general.architecture"
:
"llama"
,
"llama.attention.head_count"
:
uint32
(
32
),
"llama.attention.head_count_kv"
:
uint32
(
8
),
},
Expected
:
map
[
string
]
string
{
"general.architecture"
:
"llama"
,
"general.file_type"
:
"1"
,
"general.parameter_count"
:
"106496"
,
"general.type"
:
"adapter"
,
"general.version"
:
"v0.2"
,
"adapter.lora.alpha"
:
"16"
,
"adapter.type"
:
"lora"
,
"llama.attention.head_count"
:
"32"
,
"llama.attention.head_count_kv"
:
"8"
,
"blk.31.attn_q.weight.lora_a"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_q.weight.lora_b"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_v.weight.lora_a"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_v.weight.lora_b"
:
"071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857"
,
},
},
}
for
_
,
c
:=
range
cases
{
t
.
Run
(
c
.
Name
,
func
(
t
*
testing
.
T
)
{
t
.
Parallel
()
f
,
err
:=
os
.
CreateTemp
(
t
.
TempDir
(),
"f16"
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
t
.
Fatal
(
err
)
}
}
defer
f
.
Close
()
actual
[
tensor
.
Name
]
=
hex
.
EncodeToString
(
sha256sum
.
Sum
(
nil
))
tempDir
:=
t
.
TempDir
()
generateLoraTestData
(
t
,
tempDir
)
if
err
=
ConvertAdapter
(
os
.
DirFS
(
tempDir
),
f
,
c
.
BaseKV
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
expectFile
,
err
:=
os
.
Open
(
f
ilepath
.
Join
(
"testdata"
,
fmt
.
Sprintf
(
"%s.json"
,
tt
)
))
r
,
err
:=
os
.
Open
(
f
.
Name
(
))
if
err
!=
nil
{
if
err
!=
nil
{
t
.
Fatal
(
err
)
t
.
Fatal
(
err
)
}
}
defer
r
.
Close
()
var
expect
map
[
string
]
string
m
,
_
,
err
:=
llm
.
DecodeGGML
(
r
,
math
.
MaxInt
)
if
err
:=
json
.
NewDecoder
(
expectFile
)
.
Decode
(
&
expect
);
err
!=
nil
{
if
err
!=
nil
{
t
.
Fatal
(
err
)
t
.
Fatal
(
err
)
}
}
keys
:=
maps
.
Keys
(
expect
)
if
_
,
err
:=
r
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
:=
generateResultsJSON
(
t
,
r
,
m
.
KV
(),
m
.
Tensors
())
keys
:=
maps
.
Keys
(
c
.
Expected
)
slices
.
Sort
(
keys
)
slices
.
Sort
(
keys
)
for
_
,
k
:=
range
keys
{
for
_
,
k
:=
range
keys
{
if
v
,
ok
:=
actual
[
k
];
!
ok
{
if
v
,
ok
:=
actual
[
k
];
!
ok
{
t
.
Errorf
(
"missing %s"
,
k
)
t
.
Errorf
(
"missing %s"
,
k
)
}
else
if
v
!=
e
xpect
[
k
]
{
}
else
if
v
!=
c
.
E
xpect
ed
[
k
]
{
t
.
Errorf
(
"unexpected %s: want %s, got %s"
,
k
,
e
xpect
[
k
],
v
)
t
.
Errorf
(
"unexpected %s: want %s, got %s"
,
k
,
c
.
E
xpect
ed
[
k
],
v
)
}
}
}
}
})
})
}
}
}
}
func
generateLoraTestData
(
t
*
testing
.
T
,
tempDir
string
)
{
type
tensorData
struct
{
Offsets
[]
int
`json:"data_offsets"`
Type
string
`json:"dtype"`
Shape
[]
int
`json:"shape"`
}
offset
:=
4096
*
8
*
4
td
:=
map
[
string
]
*
tensorData
{
"__metadata__"
:
nil
}
td
[
"model.layers.31.self_attn.q_proj.lora_a"
]
=
&
tensorData
{
Offsets
:
[]
int
{
0
,
offset
},
Type
:
"F32"
,
Shape
:
[]
int
{
4096
,
8
},
}
td
[
"model.layers.31.self_attn.q_proj.lora_b"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
,
offset
*
2
},
Type
:
"F32"
,
Shape
:
[]
int
{
8
,
4096
},
}
td
[
"model.layers.31.self_attn.v_proj.lora_a"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
*
2
,
offset
*
3
},
Type
:
"F32"
,
Shape
:
[]
int
{
4096
,
8
},
}
td
[
"model.layers.31.self_attn.v_proj.lora_b"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
*
3
,
offset
*
3
+
8
*
1024
*
4
},
Type
:
"F32"
,
Shape
:
[]
int
{
8
,
1024
},
}
data
,
err
:=
json
.
Marshal
(
td
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
var
buf
bytes
.
Buffer
l
:=
int64
(
len
(
data
))
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
l
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
_
,
err
=
buf
.
Write
(
data
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
// write some data for the tensors
ones
:=
make
([]
float32
,
4096
*
8
)
for
i
:=
range
ones
{
ones
[
i
]
=
float32
(
1
)
}
for
range
3
{
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
ones
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
ones
=
make
([]
float32
,
1024
*
8
)
for
i
:=
range
ones
{
ones
[
i
]
=
float32
(
1
)
}
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
ones
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
fdata
,
err
:=
os
.
Create
(
filepath
.
Join
(
tempDir
,
"adapters.safetensors"
))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
fdata
.
Close
()
_
,
err
=
fdata
.
Write
(
buf
.
Bytes
())
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
configData
:=
`
{
"adapter_path": "adapters-test",
"batch_size": 8,
"config": "config-tiny.json",
"data": "../discollama-completion",
"grad_checkpoint": null,
"iters": 1000,
"learning_rate": 1e-05,
"lora_layers": 1,
"lora_parameters": {
"rank": 8,
"alpha": 16,
"dropout": 0.0,
"scale": 2.0
},
"lr_schedule": null,
"max_seq_length": 2048,
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
"resume_adapter_file": null,
"save_every": 100,
"seed": 0,
"steps_per_eval": 200,
"steps_per_report": 10,
"test": false,
"test_batches": 500,
"train": true,
"use_dora": false,
"val_batches": 25
}
`
f
,
err
:=
os
.
Create
(
filepath
.
Join
(
tempDir
,
"adapter_config.json"
))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
f
.
Close
()
_
,
err
=
f
.
WriteString
(
configData
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
convert/reader.go
View file @
0c819e16
...
@@ -64,6 +64,8 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
...
@@ -64,6 +64,8 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
}{
}{
{
"model-*-of-*.safetensors"
,
parseSafetensors
},
{
"model-*-of-*.safetensors"
,
parseSafetensors
},
{
"model.safetensors"
,
parseSafetensors
},
{
"model.safetensors"
,
parseSafetensors
},
{
"adapters.safetensors"
,
parseSafetensors
},
{
"adapter_model.safetensors"
,
parseSafetensors
},
{
"pytorch_model-*-of-*.bin"
,
parseTorch
},
{
"pytorch_model-*-of-*.bin"
,
parseTorch
},
{
"pytorch_model.bin"
,
parseTorch
},
{
"pytorch_model.bin"
,
parseTorch
},
{
"consolidated.*.pth"
,
parseTorch
},
{
"consolidated.*.pth"
,
parseTorch
},
...
...
llm/ggml.go
View file @
0c819e16
...
@@ -43,6 +43,14 @@ func (kv KV) Architecture() string {
...
@@ -43,6 +43,14 @@ func (kv KV) Architecture() string {
return
"unknown"
return
"unknown"
}
}
func
(
kv
KV
)
Kind
()
string
{
if
s
,
ok
:=
kv
[
"general.type"
]
.
(
string
);
ok
{
return
s
}
return
"unknown"
}
func
(
kv
KV
)
ParameterCount
()
uint64
{
func
(
kv
KV
)
ParameterCount
()
uint64
{
return
kv
.
u64
(
"general.parameter_count"
)
return
kv
.
u64
(
"general.parameter_count"
)
}
}
...
...
server/images.go
View file @
0c819e16
...
@@ -369,13 +369,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
...
@@ -369,13 +369,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
parameters
:=
make
(
map
[
string
]
any
)
parameters
:=
make
(
map
[
string
]
any
)
var
layers
[]
Layer
var
layers
[]
Layer
var
baseLayers
[]
*
layerGGML
for
_
,
c
:=
range
modelfile
.
Commands
{
for
_
,
c
:=
range
modelfile
.
Commands
{
mediatype
:=
fmt
.
Sprintf
(
"application/vnd.ollama.image.%s"
,
c
.
Name
)
mediatype
:=
fmt
.
Sprintf
(
"application/vnd.ollama.image.%s"
,
c
.
Name
)
command
:=
c
.
Name
switch
c
.
Name
{
switch
c
ommand
{
case
"model"
,
"adapter"
:
case
"model"
,
"adapter"
:
var
baseLayers
[]
*
layerGGML
if
name
:=
model
.
ParseName
(
c
.
Args
);
name
.
IsValid
()
&&
command
==
"model"
{
if
name
:=
model
.
ParseName
(
c
.
Args
);
name
.
IsValid
()
{
baseLayers
,
err
=
parseFromModel
(
ctx
,
name
,
fn
)
baseLayers
,
err
=
parseFromModel
(
ctx
,
name
,
fn
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
...
@@ -409,14 +410,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
...
@@ -409,14 +410,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
}
}
defer
blob
.
Close
()
defer
blob
.
Close
()
baseLayers
,
err
=
parseFromFile
(
ctx
,
blob
,
digest
,
fn
)
baseLayers
,
err
=
parseFromFile
(
ctx
,
command
,
baseLayers
,
blob
,
digest
,
fn
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
}
else
if
file
,
err
:=
os
.
Open
(
realpath
(
modelFileDir
,
c
.
Args
));
err
==
nil
{
}
else
if
file
,
err
:=
os
.
Open
(
realpath
(
modelFileDir
,
c
.
Args
));
err
==
nil
{
defer
file
.
Close
()
defer
file
.
Close
()
baseLayers
,
err
=
parseFromFile
(
ctx
,
file
,
""
,
fn
)
baseLayers
,
err
=
parseFromFile
(
ctx
,
command
,
baseLayers
,
file
,
""
,
fn
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
...
...
server/model.go
View file @
0c819e16
...
@@ -81,7 +81,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
...
@@ -81,7 +81,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
return
layers
,
nil
return
layers
,
nil
}
}
func
parseFromZipFile
(
_
context
.
Context
,
f
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
func
parseFromZipFile
(
_
context
.
Context
,
command
string
,
baseLayers
[]
*
layerGGML
,
f
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
fi
,
err
:=
f
.
Stat
()
fi
,
err
:=
f
.
Stat
()
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
...
@@ -108,16 +108,38 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
...
@@ -108,16 +108,38 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
defer
t
.
Close
()
defer
t
.
Close
()
defer
os
.
Remove
(
t
.
Name
())
defer
os
.
Remove
(
t
.
Name
())
fn
(
api
.
ProgressResponse
{
Status
:
"converting model"
})
var
layerType
string
if
err
:=
convert
.
Convert
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
);
err
!=
nil
{
switch
command
{
case
"adapter"
:
var
baseModel
*
llm
.
GGML
for
_
,
l
:=
range
baseLayers
{
if
l
.
GGML
!=
nil
{
baseModel
=
l
.
GGML
break
}
}
if
baseModel
==
nil
{
return
nil
,
fmt
.
Errorf
(
"no base model specified for the adapter"
)
}
if
err
:=
convert
.
ConvertAdapter
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
,
baseModel
.
KV
());
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
layerType
=
"application/vnd.ollama.image.adapter"
case
"model"
:
if
err
:=
convert
.
ConvertModel
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
);
err
!=
nil
{
return
nil
,
err
}
layerType
=
"application/vnd.ollama.image.model"
}
if
_
,
err
:=
t
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
if
_
,
err
:=
t
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
layer
,
err
:=
NewLayer
(
t
,
"application/vnd.ollama.image.model"
)
layer
,
err
:=
NewLayer
(
t
,
layerType
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
...
@@ -139,7 +161,7 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
...
@@ -139,7 +161,7 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
return
detectChatTemplate
(
layers
)
return
detectChatTemplate
(
layers
)
}
}
func
parseFromFile
(
ctx
context
.
Context
,
file
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
func
parseFromFile
(
ctx
context
.
Context
,
command
string
,
baseLayers
[]
*
layerGGML
,
file
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
sr
:=
io
.
NewSectionReader
(
file
,
0
,
512
)
sr
:=
io
.
NewSectionReader
(
file
,
0
,
512
)
contentType
,
err
:=
detectContentType
(
sr
)
contentType
,
err
:=
detectContentType
(
sr
)
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -150,7 +172,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
...
@@ -150,7 +172,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
case
"gguf"
,
"ggla"
:
case
"gguf"
,
"ggla"
:
// noop
// noop
case
"application/zip"
:
case
"application/zip"
:
return
parseFromZipFile
(
ctx
,
file
,
digest
,
fn
)
return
parseFromZipFile
(
ctx
,
command
,
baseLayers
,
file
,
digest
,
fn
)
default
:
default
:
return
nil
,
fmt
.
Errorf
(
"unsupported content type: %s"
,
contentType
)
return
nil
,
fmt
.
Errorf
(
"unsupported content type: %s"
,
contentType
)
}
}
...
@@ -170,7 +192,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
...
@@ -170,7 +192,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
}
}
mediatype
:=
"application/vnd.ollama.image.model"
mediatype
:=
"application/vnd.ollama.image.model"
if
ggml
.
Name
()
==
"ggla"
{
if
ggml
.
Name
()
==
"ggla"
||
ggml
.
KV
()
.
Kind
()
==
"adapter"
{
mediatype
=
"application/vnd.ollama.image.adapter"
mediatype
=
"application/vnd.ollama.image.adapter"
}
else
if
ggml
.
KV
()
.
Architecture
()
==
"clip"
{
}
else
if
ggml
.
KV
()
.
Architecture
()
==
"clip"
{
mediatype
=
"application/vnd.ollama.image.projector"
mediatype
=
"application/vnd.ollama.image.projector"
...
...
server/model_test.go
View file @
0c819e16
...
@@ -153,7 +153,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
...
@@ -153,7 +153,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
}
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
file
,
""
,
func
(
api
.
ProgressResponse
)
{})
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file
,
""
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
}
...
@@ -166,7 +166,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
...
@@ -166,7 +166,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
}
layers2
,
err
:=
parseFromFile
(
context
.
Background
(),
file
,
layers
[
0
]
.
Digest
,
func
(
api
.
ProgressResponse
)
{})
layers2
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file
,
layers
[
0
]
.
Digest
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
}
...
@@ -206,7 +206,7 @@ func TestParseLayerFromCopy(t *testing.T) {
...
@@ -206,7 +206,7 @@ func TestParseLayerFromCopy(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
}
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
file2
,
""
,
func
(
api
.
ProgressResponse
)
{})
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file2
,
""
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment