Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
0c819e16
Unverified
Commit
0c819e16
authored
Aug 23, 2024
by
Patrick Devine
Committed by
GitHub
Aug 23, 2024
Browse files
convert safetensor adapters into GGUF (#6327)
parent
7a1e1c1c
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
697 additions
and
101 deletions
+697
-101
cmd/cmd.go
cmd/cmd.go
+6
-0
convert/convert.go
convert/convert.go
+97
-14
convert/convert_bert.go
convert/convert_bert.go
+9
-9
convert/convert_gemma.go
convert/convert_gemma.go
+9
-9
convert/convert_gemma2.go
convert/convert_gemma2.go
+6
-6
convert/convert_gemma2_adapter.go
convert/convert_gemma2_adapter.go
+91
-0
convert/convert_llama.go
convert/convert_llama.go
+8
-8
convert/convert_llama_adapter.go
convert/convert_llama_adapter.go
+169
-0
convert/convert_mixtral.go
convert/convert_mixtral.go
+8
-8
convert/convert_phi3.go
convert/convert_phi3.go
+7
-7
convert/convert_test.go
convert/convert_test.go
+238
-24
convert/reader.go
convert/reader.go
+2
-0
llm/ggml.go
llm/ggml.go
+8
-0
server/images.go
server/images.go
+6
-5
server/model.go
server/model.go
+30
-8
server/model_test.go
server/model_test.go
+3
-3
No files found.
cmd/cmd.go
View file @
0c819e16
...
...
@@ -204,6 +204,12 @@ func tempZipFiles(path string) (string, error) {
// safetensors files might be unresolved git lfs references; skip if they are
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
st
,
_
:=
glob
(
filepath
.
Join
(
path
,
"adapters.safetensors"
),
"application/octet-stream"
);
len
(
st
)
>
0
{
// covers adapters.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
st
,
_
:=
glob
(
filepath
.
Join
(
path
,
"adapter_model.safetensors"
),
"application/octet-stream"
);
len
(
st
)
>
0
{
// covers adapter_model.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
pt
,
_
:=
glob
(
filepath
.
Join
(
path
,
"pytorch_model*.bin"
),
"application/zip"
);
len
(
pt
)
>
0
{
// pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
...
...
convert/convert.go
View file @
0c819e16
...
...
@@ -12,12 +12,22 @@ import (
"github.com/ollama/ollama/llm"
)
type
Parameters
struct
{
type
Model
Parameters
struct
{
Architectures
[]
string
`json:"architectures"`
VocabSize
uint32
`json:"vocab_size"`
}
func
(
Parameters
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
type
AdapterParameters
struct
{
Alpha
uint32
`json:"lora_alpha"`
LoraLayers
uint32
`json:"lora_layers"`
LoraParameters
struct
{
Rank
uint32
`json:"rank"`
Alpha
float32
`json:"alpha"`
Scale
float32
`json:"scale"`
}
`json:"lora_parameters"`
}
func
(
ModelParameters
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
llm
.
KV
{
"general.file_type"
:
uint32
(
1
),
"general.quantization_version"
:
uint32
(
2
),
...
...
@@ -44,17 +54,40 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
Parameters
)
specialTokenTypes
()
[]
string
{
func
(
p
AdapterParameters
)
KV
()
llm
.
KV
{
var
alpha
float32
if
p
.
LoraParameters
.
Alpha
==
0
{
alpha
=
float32
(
p
.
Alpha
)
}
else
{
alpha
=
p
.
LoraParameters
.
Alpha
}
kv
:=
llm
.
KV
{
"adapter.lora.alpha"
:
alpha
,
"adapter.type"
:
"lora"
,
"general.file_type"
:
uint32
(
1
),
"general.type"
:
"adapter"
,
"general.version"
:
"v0.2"
,
}
return
kv
}
func
(
ModelParameters
)
specialTokenTypes
()
[]
string
{
return
[]
string
{
"bos"
,
"eos"
,
"unk"
,
"sep"
,
"pad"
,
"cls"
,
"mask"
,
}
}
func
(
Parameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
func
(
ModelParameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
return
llm
.
WriteGGUF
(
ws
,
kv
,
ts
)
}
func
(
AdapterParameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
return
llm
.
WriteGGUF
(
ws
,
kv
,
ts
)
}
type
Converter
interface
{
type
Model
Converter
interface
{
// KV maps parameters to LLM key-values
KV
(
*
Tokenizer
)
llm
.
KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
...
...
@@ -73,17 +106,67 @@ type moreParser interface {
parseMore
(
fs
.
FS
)
error
}
type
AdapterConverter
interface
{
// KV maps parameters to LLM key-values
KV
(
llm
.
KV
)
llm
.
KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors
([]
Tensor
)
[]
llm
.
Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements
()
[]
string
writeFile
(
io
.
WriteSeeker
,
llm
.
KV
,
[]
llm
.
Tensor
)
error
}
func
ConvertAdapter
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
,
baseKV
llm
.
KV
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"adapter_config.json"
)
if
err
!=
nil
{
return
err
}
var
p
AdapterParameters
if
err
:=
json
.
Unmarshal
(
bts
,
&
p
);
err
!=
nil
{
return
err
}
arch
,
ok
:=
baseKV
[
"general.architecture"
]
if
!
ok
{
return
errors
.
New
(
"architecture not set for the base model"
)
}
var
conv
AdapterConverter
switch
arch
{
case
"llama"
:
conv
=
&
llamaAdapter
{}
case
"gemma2"
:
conv
=
&
gemma2Adapter
{}
default
:
return
errors
.
New
(
"unsupported architecture"
)
}
ts
,
err
:=
parseTensors
(
fsys
,
strings
.
NewReplacer
(
conv
.
Replacements
()
...
))
if
err
!=
nil
{
return
err
}
if
err
:=
json
.
Unmarshal
(
bts
,
conv
);
err
!=
nil
{
return
err
}
return
conv
.
writeFile
(
ws
,
conv
.
KV
(
baseKV
),
conv
.
Tensors
(
ts
))
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func
Convert
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
)
error
{
func
Convert
Model
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"config.json"
)
if
err
!=
nil
{
return
err
}
var
p
Parameters
var
p
Model
Parameters
if
err
:=
json
.
Unmarshal
(
bts
,
&
p
);
err
!=
nil
{
return
err
}
...
...
@@ -92,20 +175,20 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
return
errors
.
New
(
"unknown architecture"
)
}
var
conv
Converter
var
conv
Model
Converter
switch
p
.
Architectures
[
0
]
{
case
"LlamaForCausalLM"
,
"MistralForCausalLM"
:
conv
=
&
llama
{}
conv
=
&
llama
Model
{}
case
"MixtralForCausalLM"
:
conv
=
&
mixtral
{}
conv
=
&
mixtral
Model
{}
case
"GemmaForCausalLM"
:
conv
=
&
gemma
{}
conv
=
&
gemma
Model
{}
case
"Gemma2ForCausalLM"
:
conv
=
&
gemma2
{}
conv
=
&
gemma2
Model
{}
case
"Phi3ForCausalLM"
:
conv
=
&
phi3
{}
conv
=
&
phi3
Model
{}
case
"BertModel"
:
conv
=
&
bert
{}
conv
=
&
bert
Model
{}
default
:
return
errors
.
New
(
"unsupported architecture"
)
}
...
...
convert/convert_bert.go
View file @
0c819e16
...
...
@@ -11,8 +11,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
bert
struct
{
Parameters
type
bert
Model
struct
{
Model
Parameters
NLayers
uint32
`json:"n_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayer
uint32
`json:"n_layer"`
...
...
@@ -33,11 +33,11 @@ type bert struct {
}
var
(
_
Converter
=
(
*
bert
)(
nil
)
_
moreParser
=
(
*
bert
)(
nil
)
_
Model
Converter
=
(
*
bert
Model
)(
nil
)
_
moreParser
=
(
*
bert
Model
)(
nil
)
)
func
(
p
*
bert
)
parseMore
(
fsys
fs
.
FS
)
error
{
func
(
p
*
bert
Model
)
parseMore
(
fsys
fs
.
FS
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"modules.json"
)
if
err
!=
nil
{
return
err
...
...
@@ -85,8 +85,8 @@ func (p *bert) parseMore(fsys fs.FS) error {
return
nil
}
func
(
p
*
bert
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
bert
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"bert"
kv
[
"bert.attention.causal"
]
=
false
kv
[
"bert.pooling_type"
]
=
p
.
PoolingType
...
...
@@ -132,7 +132,7 @@ func (p *bert) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
bert
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
bert
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
if
slices
.
Contains
([]
string
{
...
...
@@ -154,7 +154,7 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
bert
)
Replacements
()
[]
string
{
func
(
bert
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"encoder.layer"
,
"blk"
,
"encoder.layers"
,
"blk"
,
...
...
convert/convert_gemma.go
View file @
0c819e16
...
...
@@ -9,8 +9,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
gemma
struct
{
Parameters
type
gemma
Model
struct
{
Model
Parameters
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
...
...
@@ -21,10 +21,10 @@ type gemma struct {
HeadDim
uint32
`json:"head_dim"`
}
var
_
Converter
=
(
*
gemma
)(
nil
)
var
_
Model
Converter
=
(
*
gemma
Model
)(
nil
)
func
(
p
*
gemma
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
gemma
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma"
kv
[
"gemma.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma.embedding_length"
]
=
p
.
HiddenSize
...
...
@@ -42,8 +42,8 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
gemma
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
out
:=
make
([]
llm
.
Tensor
,
0
,
len
(
ts
))
func
(
p
*
gemma
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
if
strings
.
HasSuffix
(
t
.
Name
(),
"_norm.weight"
)
{
t
.
SetRepacker
(
p
.
addOne
)
...
...
@@ -60,7 +60,7 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
p
*
gemma
)
Replacements
()
[]
string
{
func
(
p
*
gemma
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"model.embed_tokens"
,
"token_embd"
,
"model.norm"
,
"output_norm"
,
...
...
@@ -77,7 +77,7 @@ func (p *gemma) Replacements() []string {
}
}
func
(
*
gemma
)
addOne
(
_
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
func
(
*
gemma
Model
)
addOne
(
_
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
n
:=
tensor
.
New
(
tensor
.
WithShape
(
int
(
shape
[
0
])),
tensor
.
WithBacking
(
data
))
ones
:=
tensor
.
Ones
(
tensor
.
Float32
,
int
(
shape
[
0
]))
...
...
convert/convert_gemma2.go
View file @
0c819e16
...
...
@@ -4,15 +4,15 @@ import (
"github.com/ollama/ollama/llm"
)
type
gemma2
struct
{
gemma
type
gemma2
Model
struct
{
gemma
Model
SlidingWindow
uint32
`json:"sliding_window"`
AttentionLogitSoftcap
float32
`json:"attn_logit_softcapping"`
FinalLogitSoftcap
float32
`json:"final_logit_softcapping"`
}
func
(
p
*
gemma2
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
gemma2
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma2"
kv
[
"gemma2.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma2.embedding_length"
]
=
p
.
HiddenSize
...
...
@@ -33,9 +33,9 @@ func (p *gemma2) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
gemma2
)
Replacements
()
[]
string
{
func
(
p
*
gemma2
Model
)
Replacements
()
[]
string
{
return
append
(
p
.
gemma
.
Replacements
(),
p
.
gemma
Model
.
Replacements
(),
"post_attention_layernorm"
,
"post_attention_norm"
,
"pre_feedforward_layernorm"
,
"ffn_norm"
,
"post_feedforward_layernorm"
,
"post_ffw_norm"
,
...
...
convert/convert_gemma2_adapter.go
0 → 100644
View file @
0c819e16
package
convert
import
(
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type
gemma2Adapter
struct
{
AdapterParameters
}
var
_
AdapterConverter
=
(
*
gemma2Adapter
)(
nil
)
func
(
p
*
gemma2Adapter
)
KV
(
baseKV
llm
.
KV
)
llm
.
KV
{
kv
:=
p
.
AdapterParameters
.
KV
()
kv
[
"general.architecture"
]
=
"gemma2"
return
kv
}
func
(
p
*
gemma2Adapter
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
shape
:=
t
.
Shape
()
if
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_a"
)
&&
shape
[
0
]
>
shape
[
1
])
||
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_b"
)
&&
shape
[
0
]
<
shape
[
1
])
{
shape
[
0
],
shape
[
1
]
=
shape
[
1
],
shape
[
0
]
t
.
SetRepacker
(
p
.
repack
)
}
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
t
.
Name
(),
Kind
:
t
.
Kind
(),
Shape
:
t
.
Shape
(),
WriterTo
:
t
,
})
}
return
out
}
func
(
p
*
gemma2Adapter
)
Replacements
()
[]
string
{
return
[]
string
{
"base_model.model."
,
""
,
"model.layers"
,
"blk"
,
"self_attn.q_proj"
,
"attn_q"
,
"self_attn.k_proj"
,
"attn_k"
,
"self_attn.v_proj"
,
"attn_v"
,
"self_attn.o_proj"
,
"attn_output"
,
"mlp.gate_proj"
,
"ffn_gate"
,
"mlp.down_proj"
,
"ffn_down"
,
"mlp.up_proj"
,
"ffn_up"
,
"lora_A.weight"
,
"weight.lora_a"
,
"lora_B.weight"
,
"weight.lora_b"
,
"lora_a"
,
"weight.lora_a"
,
"lora_b"
,
"weight.lora_b"
,
}
}
func
(
p
*
gemma2Adapter
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
if
err
:=
n
.
T
(
1
,
0
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
convert/convert_llama.go
View file @
0c819e16
...
...
@@ -12,8 +12,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
llama
struct
{
Parameters
type
llama
Model
struct
{
Model
Parameters
NLayers
uint32
`json:"n_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayer
uint32
`json:"n_layer"`
...
...
@@ -44,10 +44,10 @@ type llama struct {
HeadDim
uint32
`json:"head_dim"`
}
var
_
Converter
=
(
*
llama
)(
nil
)
var
_
Model
Converter
=
(
*
llama
Model
)(
nil
)
func
(
p
*
llama
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
llama
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"llama"
kv
[
"llama.vocab_size"
]
=
p
.
VocabSize
...
...
@@ -120,7 +120,7 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
llama
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
llama
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
if
p
.
RopeScaling
.
factors
!=
nil
{
...
...
@@ -149,7 +149,7 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
p
*
llama
)
Replacements
()
[]
string
{
func
(
p
*
llama
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"lm_head"
,
"output"
,
"model.embed_tokens"
,
"token_embd"
,
...
...
@@ -167,7 +167,7 @@ func (p *llama) Replacements() []string {
}
}
func
(
p
*
llama
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
func
(
p
*
llama
Model
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
var
dims
[]
int
for
_
,
dim
:=
range
shape
{
dims
=
append
(
dims
,
int
(
dim
))
...
...
convert/convert_llama_adapter.go
0 → 100644
View file @
0c819e16
package
convert
import
(
"cmp"
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type
llamaAdapter
struct
{
AdapterParameters
NumAttentionHeads
uint32
`json:"num_attention_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
}
var
_
AdapterConverter
=
(
*
llamaAdapter
)(
nil
)
func
(
p
*
llamaAdapter
)
KV
(
baseKV
llm
.
KV
)
llm
.
KV
{
kv
:=
p
.
AdapterParameters
.
KV
()
kv
[
"general.architecture"
]
=
"llama"
kv
[
"llama.attention.head_count"
]
=
baseKV
[
"llama.attention.head_count"
]
kv
[
"llama.attention.head_count_kv"
]
=
baseKV
[
"llama.attention.head_count_kv"
]
p
.
NumAttentionHeads
=
baseKV
[
"llama.attention.head_count"
]
.
(
uint32
)
return
kv
}
func
(
p
*
llamaAdapter
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
shape
:=
t
.
Shape
()
if
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_a"
)
&&
shape
[
0
]
>
shape
[
1
])
||
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_b"
)
&&
shape
[
0
]
<
shape
[
1
])
{
shape
[
0
],
shape
[
1
]
=
shape
[
1
],
shape
[
0
]
t
.
SetRepacker
(
p
.
repackAndTranspose
)
}
else
{
t
.
SetRepacker
(
p
.
repack
)
}
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
t
.
Name
(),
Kind
:
t
.
Kind
(),
Shape
:
shape
,
WriterTo
:
t
,
})
}
return
out
}
func
(
p
*
llamaAdapter
)
Replacements
()
[]
string
{
return
[]
string
{
"base_model.model."
,
""
,
"model.layers"
,
"blk"
,
"self_attn.q_proj"
,
"attn_q"
,
"self_attn.k_proj"
,
"attn_k"
,
"self_attn.v_proj"
,
"attn_v"
,
"self_attn.o_proj"
,
"attn_output"
,
"mlp.gate_proj"
,
"ffn_gate"
,
"mlp.down_proj"
,
"ffn_down"
,
"mlp.up_proj"
,
"ffn_up"
,
"lora_A.weight"
,
"weight.lora_a"
,
"lora_B.weight"
,
"weight.lora_b"
,
"lora_a"
,
"weight.lora_a"
,
"lora_b"
,
"weight.lora_b"
,
}
}
func
(
p
*
llamaAdapter
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
var
heads
uint32
if
strings
.
HasSuffix
(
name
,
"attn_q.weight.lora_a"
)
{
heads
=
p
.
NumAttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight.lora_a"
)
{
heads
=
cmp
.
Or
(
p
.
NumKeyValueHeads
,
p
.
NumAttentionHeads
)
}
else
{
return
data
,
nil
}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
if
err
:=
n
.
Reshape
(
append
([]
int
{
int
(
heads
),
2
,
dims
[
0
]
/
int
(
heads
)
/
2
},
dims
[
1
:
]
...
)
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
T
(
0
,
2
,
1
,
3
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
func
(
p
*
llamaAdapter
)
repackAndTranspose
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
var
heads
uint32
if
strings
.
HasSuffix
(
name
,
"attn_q.weight.lora_a"
)
{
heads
=
p
.
NumAttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight.lora_a"
)
{
heads
=
cmp
.
Or
(
p
.
NumKeyValueHeads
,
p
.
NumAttentionHeads
)
}
if
heads
>
0
{
if
err
:=
n
.
Reshape
(
append
([]
int
{
int
(
heads
),
2
,
dims
[
0
]
/
int
(
heads
)
/
2
},
dims
[
1
:
]
...
)
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
T
(
0
,
2
,
1
,
3
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
}
if
err
:=
n
.
T
(
1
,
0
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
convert/convert_mixtral.go
View file @
0c819e16
...
...
@@ -9,14 +9,14 @@ import (
"github.com/ollama/ollama/llm"
)
type
mixtral
struct
{
llama
type
mixtral
Model
struct
{
llama
Model
NumLocalExperts
uint32
`json:"num_local_experts"`
NumExpertsPerToken
uint32
`json:"num_experts_per_tok"`
}
func
(
p
*
mixtral
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
llama
.
KV
(
t
)
func
(
p
*
mixtral
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
llama
Model
.
KV
(
t
)
if
p
.
NumLocalExperts
>
0
{
kv
[
"llama.expert_count"
]
=
p
.
NumLocalExperts
...
...
@@ -29,7 +29,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
mixtral
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
mixtral
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
oldnew
:=
[]
string
{
"model.layers"
,
"blk"
,
"w1"
,
"ffn_gate_exps"
,
...
...
@@ -67,12 +67,12 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
})
}
return
append
(
out
,
p
.
llama
.
Tensors
(
ts
)
...
)
return
append
(
out
,
p
.
llama
Model
.
Tensors
(
ts
)
...
)
}
func
(
p
*
mixtral
)
Replacements
()
[]
string
{
func
(
p
*
mixtral
Model
)
Replacements
()
[]
string
{
return
append
(
p
.
llama
.
Replacements
(),
p
.
llama
Model
.
Replacements
(),
"block_sparse_moe.gate"
,
"ffn_gate_inp"
,
)
}
...
...
convert/convert_phi3.go
View file @
0c819e16
...
...
@@ -11,8 +11,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
phi3
struct
{
Parameters
type
phi3
Model
struct
{
Model
Parameters
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayers
uint32
`json:"n_layers"`
HiddenSize
uint32
`json:"hidden_size"`
...
...
@@ -35,10 +35,10 @@ type phi3 struct {
SlidingWindow
uint32
`json:"sliding_window"`
}
var
_
Converter
=
(
*
phi3
)(
nil
)
var
_
Model
Converter
=
(
*
phi3
Model
)(
nil
)
func
(
p
*
phi3
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
phi3
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"phi3"
kv
[
"phi3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"phi3.embedding_length"
]
=
cmp
.
Or
(
p
.
HiddenSize
,
p
.
NEmbd
)
...
...
@@ -68,7 +68,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
phi3
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
phi3
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
addRopeFactors
sync
.
Once
out
:=
make
([]
llm
.
Tensor
,
0
,
len
(
ts
)
+
2
)
...
...
@@ -100,7 +100,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
p
*
phi3
)
Replacements
()
[]
string
{
func
(
p
*
phi3
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"lm_head"
,
"output"
,
"model.embed_tokens"
,
"token_embd"
,
...
...
convert/convert_test.go
View file @
0c819e16
package
convert
import
(
"bytes"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"encoding/json"
"flag"
...
...
@@ -29,7 +31,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
}
defer
f
.
Close
()
if
err
:=
Convert
(
fsys
,
f
);
err
!=
nil
{
if
err
:=
Convert
Model
(
fsys
,
f
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
...
...
@@ -51,6 +53,34 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
return
r
,
m
.
KV
(),
m
.
Tensors
()
}
func
generateResultsJSON
(
t
*
testing
.
T
,
f
*
os
.
File
,
kv
llm
.
KV
,
tensors
llm
.
Tensors
)
map
[
string
]
string
{
actual
:=
make
(
map
[
string
]
string
)
for
k
,
v
:=
range
kv
{
if
s
,
ok
:=
v
.
(
json
.
Marshaler
);
!
ok
{
actual
[
k
]
=
fmt
.
Sprintf
(
"%v"
,
v
)
}
else
{
bts
,
err
:=
json
.
Marshal
(
s
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
k
]
=
fmt
.
Sprintf
(
"%x"
,
sha256
.
Sum256
(
bts
))
}
}
for
_
,
tensor
:=
range
tensors
.
Items
{
sha256sum
:=
sha256
.
New
()
sr
:=
io
.
NewSectionReader
(
f
,
int64
(
tensors
.
Offset
+
tensor
.
Offset
),
int64
(
tensor
.
Size
()))
if
_
,
err
:=
io
.
Copy
(
sha256sum
,
sr
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
tensor
.
Name
]
=
hex
.
EncodeToString
(
sha256sum
.
Sum
(
nil
))
}
return
actual
}
func
TestMain
(
m
*
testing
.
M
)
{
var
level
slog
.
Level
flag
.
TextVar
(
&
level
,
"level"
,
slog
.
LevelInfo
,
"log level"
)
...
...
@@ -85,29 +115,7 @@ func TestConvertFull(t *testing.T) {
}
f
,
kv
,
tensors
:=
convertFull
(
t
,
os
.
DirFS
(
p
))
actual
:=
make
(
map
[
string
]
string
)
for
k
,
v
:=
range
kv
{
if
s
,
ok
:=
v
.
(
json
.
Marshaler
);
!
ok
{
actual
[
k
]
=
fmt
.
Sprintf
(
"%v"
,
v
)
}
else
{
bts
,
err
:=
json
.
Marshal
(
s
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
k
]
=
fmt
.
Sprintf
(
"%x"
,
sha256
.
Sum256
(
bts
))
}
}
for
_
,
tensor
:=
range
tensors
.
Items
{
sha256sum
:=
sha256
.
New
()
sr
:=
io
.
NewSectionReader
(
f
,
int64
(
tensors
.
Offset
+
tensor
.
Offset
),
int64
(
tensor
.
Size
()))
if
_
,
err
:=
io
.
Copy
(
sha256sum
,
sr
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
tensor
.
Name
]
=
hex
.
EncodeToString
(
sha256sum
.
Sum
(
nil
))
}
actual
:=
generateResultsJSON
(
t
,
f
,
kv
,
tensors
)
expectFile
,
err
:=
os
.
Open
(
filepath
.
Join
(
"testdata"
,
fmt
.
Sprintf
(
"%s.json"
,
tt
)))
if
err
!=
nil
{
...
...
@@ -131,3 +139,209 @@ func TestConvertFull(t *testing.T) {
})
}
}
func
TestConvertAdapter
(
t
*
testing
.
T
)
{
type
AdapterCase
struct
{
Name
string
BaseKV
map
[
string
]
any
Expected
map
[
string
]
string
}
cases
:=
[]
AdapterCase
{
{
Name
:
"discollama"
,
BaseKV
:
map
[
string
]
any
{
"general.architecture"
:
"llama"
,
"llama.attention.head_count"
:
uint32
(
32
),
"llama.attention.head_count_kv"
:
uint32
(
8
),
},
Expected
:
map
[
string
]
string
{
"general.architecture"
:
"llama"
,
"general.file_type"
:
"1"
,
"general.parameter_count"
:
"106496"
,
"general.type"
:
"adapter"
,
"general.version"
:
"v0.2"
,
"adapter.lora.alpha"
:
"16"
,
"adapter.type"
:
"lora"
,
"llama.attention.head_count"
:
"32"
,
"llama.attention.head_count_kv"
:
"8"
,
"blk.31.attn_q.weight.lora_a"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_q.weight.lora_b"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_v.weight.lora_a"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_v.weight.lora_b"
:
"071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857"
,
},
},
}
for
_
,
c
:=
range
cases
{
t
.
Run
(
c
.
Name
,
func
(
t
*
testing
.
T
)
{
t
.
Parallel
()
f
,
err
:=
os
.
CreateTemp
(
t
.
TempDir
(),
"f16"
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
f
.
Close
()
tempDir
:=
t
.
TempDir
()
generateLoraTestData
(
t
,
tempDir
)
if
err
=
ConvertAdapter
(
os
.
DirFS
(
tempDir
),
f
,
c
.
BaseKV
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
r
,
err
:=
os
.
Open
(
f
.
Name
())
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
r
.
Close
()
m
,
_
,
err
:=
llm
.
DecodeGGML
(
r
,
math
.
MaxInt
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
if
_
,
err
:=
r
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
:=
generateResultsJSON
(
t
,
r
,
m
.
KV
(),
m
.
Tensors
())
keys
:=
maps
.
Keys
(
c
.
Expected
)
slices
.
Sort
(
keys
)
for
_
,
k
:=
range
keys
{
if
v
,
ok
:=
actual
[
k
];
!
ok
{
t
.
Errorf
(
"missing %s"
,
k
)
}
else
if
v
!=
c
.
Expected
[
k
]
{
t
.
Errorf
(
"unexpected %s: want %s, got %s"
,
k
,
c
.
Expected
[
k
],
v
)
}
}
})
}
}
func
generateLoraTestData
(
t
*
testing
.
T
,
tempDir
string
)
{
type
tensorData
struct
{
Offsets
[]
int
`json:"data_offsets"`
Type
string
`json:"dtype"`
Shape
[]
int
`json:"shape"`
}
offset
:=
4096
*
8
*
4
td
:=
map
[
string
]
*
tensorData
{
"__metadata__"
:
nil
}
td
[
"model.layers.31.self_attn.q_proj.lora_a"
]
=
&
tensorData
{
Offsets
:
[]
int
{
0
,
offset
},
Type
:
"F32"
,
Shape
:
[]
int
{
4096
,
8
},
}
td
[
"model.layers.31.self_attn.q_proj.lora_b"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
,
offset
*
2
},
Type
:
"F32"
,
Shape
:
[]
int
{
8
,
4096
},
}
td
[
"model.layers.31.self_attn.v_proj.lora_a"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
*
2
,
offset
*
3
},
Type
:
"F32"
,
Shape
:
[]
int
{
4096
,
8
},
}
td
[
"model.layers.31.self_attn.v_proj.lora_b"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
*
3
,
offset
*
3
+
8
*
1024
*
4
},
Type
:
"F32"
,
Shape
:
[]
int
{
8
,
1024
},
}
data
,
err
:=
json
.
Marshal
(
td
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
var
buf
bytes
.
Buffer
l
:=
int64
(
len
(
data
))
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
l
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
_
,
err
=
buf
.
Write
(
data
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
// write some data for the tensors
ones
:=
make
([]
float32
,
4096
*
8
)
for
i
:=
range
ones
{
ones
[
i
]
=
float32
(
1
)
}
for
range
3
{
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
ones
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
ones
=
make
([]
float32
,
1024
*
8
)
for
i
:=
range
ones
{
ones
[
i
]
=
float32
(
1
)
}
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
ones
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
fdata
,
err
:=
os
.
Create
(
filepath
.
Join
(
tempDir
,
"adapters.safetensors"
))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
fdata
.
Close
()
_
,
err
=
fdata
.
Write
(
buf
.
Bytes
())
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
configData
:=
`
{
"adapter_path": "adapters-test",
"batch_size": 8,
"config": "config-tiny.json",
"data": "../discollama-completion",
"grad_checkpoint": null,
"iters": 1000,
"learning_rate": 1e-05,
"lora_layers": 1,
"lora_parameters": {
"rank": 8,
"alpha": 16,
"dropout": 0.0,
"scale": 2.0
},
"lr_schedule": null,
"max_seq_length": 2048,
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
"resume_adapter_file": null,
"save_every": 100,
"seed": 0,
"steps_per_eval": 200,
"steps_per_report": 10,
"test": false,
"test_batches": 500,
"train": true,
"use_dora": false,
"val_batches": 25
}
`
f
,
err
:=
os
.
Create
(
filepath
.
Join
(
tempDir
,
"adapter_config.json"
))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
f
.
Close
()
_
,
err
=
f
.
WriteString
(
configData
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
convert/reader.go
View file @
0c819e16
...
...
@@ -64,6 +64,8 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
}{
{
"model-*-of-*.safetensors"
,
parseSafetensors
},
{
"model.safetensors"
,
parseSafetensors
},
{
"adapters.safetensors"
,
parseSafetensors
},
{
"adapter_model.safetensors"
,
parseSafetensors
},
{
"pytorch_model-*-of-*.bin"
,
parseTorch
},
{
"pytorch_model.bin"
,
parseTorch
},
{
"consolidated.*.pth"
,
parseTorch
},
...
...
llm/ggml.go
View file @
0c819e16
...
...
@@ -43,6 +43,14 @@ func (kv KV) Architecture() string {
return
"unknown"
}
func
(
kv
KV
)
Kind
()
string
{
if
s
,
ok
:=
kv
[
"general.type"
]
.
(
string
);
ok
{
return
s
}
return
"unknown"
}
func
(
kv
KV
)
ParameterCount
()
uint64
{
return
kv
.
u64
(
"general.parameter_count"
)
}
...
...
server/images.go
View file @
0c819e16
...
...
@@ -369,13 +369,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
parameters
:=
make
(
map
[
string
]
any
)
var
layers
[]
Layer
var
baseLayers
[]
*
layerGGML
for
_
,
c
:=
range
modelfile
.
Commands
{
mediatype
:=
fmt
.
Sprintf
(
"application/vnd.ollama.image.%s"
,
c
.
Name
)
command
:=
c
.
Name
switch
c
.
Name
{
switch
c
ommand
{
case
"model"
,
"adapter"
:
var
baseLayers
[]
*
layerGGML
if
name
:=
model
.
ParseName
(
c
.
Args
);
name
.
IsValid
()
{
if
name
:=
model
.
ParseName
(
c
.
Args
);
name
.
IsValid
()
&&
command
==
"model"
{
baseLayers
,
err
=
parseFromModel
(
ctx
,
name
,
fn
)
if
err
!=
nil
{
return
err
...
...
@@ -409,14 +410,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
}
defer
blob
.
Close
()
baseLayers
,
err
=
parseFromFile
(
ctx
,
blob
,
digest
,
fn
)
baseLayers
,
err
=
parseFromFile
(
ctx
,
command
,
baseLayers
,
blob
,
digest
,
fn
)
if
err
!=
nil
{
return
err
}
}
else
if
file
,
err
:=
os
.
Open
(
realpath
(
modelFileDir
,
c
.
Args
));
err
==
nil
{
defer
file
.
Close
()
baseLayers
,
err
=
parseFromFile
(
ctx
,
file
,
""
,
fn
)
baseLayers
,
err
=
parseFromFile
(
ctx
,
command
,
baseLayers
,
file
,
""
,
fn
)
if
err
!=
nil
{
return
err
}
...
...
server/model.go
View file @
0c819e16
...
...
@@ -81,7 +81,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
return
layers
,
nil
}
func
parseFromZipFile
(
_
context
.
Context
,
f
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
func
parseFromZipFile
(
_
context
.
Context
,
command
string
,
baseLayers
[]
*
layerGGML
,
f
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
fi
,
err
:=
f
.
Stat
()
if
err
!=
nil
{
return
nil
,
err
...
...
@@ -108,16 +108,38 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
defer
t
.
Close
()
defer
os
.
Remove
(
t
.
Name
())
fn
(
api
.
ProgressResponse
{
Status
:
"converting model"
})
if
err
:=
convert
.
Convert
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
);
err
!=
nil
{
return
nil
,
err
var
layerType
string
switch
command
{
case
"adapter"
:
var
baseModel
*
llm
.
GGML
for
_
,
l
:=
range
baseLayers
{
if
l
.
GGML
!=
nil
{
baseModel
=
l
.
GGML
break
}
}
if
baseModel
==
nil
{
return
nil
,
fmt
.
Errorf
(
"no base model specified for the adapter"
)
}
if
err
:=
convert
.
ConvertAdapter
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
,
baseModel
.
KV
());
err
!=
nil
{
return
nil
,
err
}
layerType
=
"application/vnd.ollama.image.adapter"
case
"model"
:
if
err
:=
convert
.
ConvertModel
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
);
err
!=
nil
{
return
nil
,
err
}
layerType
=
"application/vnd.ollama.image.model"
}
if
_
,
err
:=
t
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
return
nil
,
err
}
layer
,
err
:=
NewLayer
(
t
,
"application/vnd.ollama.image.model"
)
layer
,
err
:=
NewLayer
(
t
,
layerType
)
if
err
!=
nil
{
return
nil
,
err
}
...
...
@@ -139,7 +161,7 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
return
detectChatTemplate
(
layers
)
}
func
parseFromFile
(
ctx
context
.
Context
,
file
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
func
parseFromFile
(
ctx
context
.
Context
,
command
string
,
baseLayers
[]
*
layerGGML
,
file
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
sr
:=
io
.
NewSectionReader
(
file
,
0
,
512
)
contentType
,
err
:=
detectContentType
(
sr
)
if
err
!=
nil
{
...
...
@@ -150,7 +172,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
case
"gguf"
,
"ggla"
:
// noop
case
"application/zip"
:
return
parseFromZipFile
(
ctx
,
file
,
digest
,
fn
)
return
parseFromZipFile
(
ctx
,
command
,
baseLayers
,
file
,
digest
,
fn
)
default
:
return
nil
,
fmt
.
Errorf
(
"unsupported content type: %s"
,
contentType
)
}
...
...
@@ -170,7 +192,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
}
mediatype
:=
"application/vnd.ollama.image.model"
if
ggml
.
Name
()
==
"ggla"
{
if
ggml
.
Name
()
==
"ggla"
||
ggml
.
KV
()
.
Kind
()
==
"adapter"
{
mediatype
=
"application/vnd.ollama.image.adapter"
}
else
if
ggml
.
KV
()
.
Architecture
()
==
"clip"
{
mediatype
=
"application/vnd.ollama.image.projector"
...
...
server/model_test.go
View file @
0c819e16
...
...
@@ -153,7 +153,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
file
,
""
,
func
(
api
.
ProgressResponse
)
{})
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file
,
""
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
...
...
@@ -166,7 +166,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
layers2
,
err
:=
parseFromFile
(
context
.
Background
(),
file
,
layers
[
0
]
.
Digest
,
func
(
api
.
ProgressResponse
)
{})
layers2
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file
,
layers
[
0
]
.
Digest
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
...
...
@@ -206,7 +206,7 @@ func TestParseLayerFromCopy(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
file2
,
""
,
func
(
api
.
ProgressResponse
)
{})
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file2
,
""
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment