Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
0c819e16
"vscode:/vscode.git/clone" did not exist on "92a57a8e84a98a4da87b6a0621615eed4a23edc4"
Unverified
Commit
0c819e16
authored
Aug 23, 2024
by
Patrick Devine
Committed by
GitHub
Aug 23, 2024
Browse files
convert safetensor adapters into GGUF (#6327)
parent
7a1e1c1c
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
697 additions
and
101 deletions
+697
-101
cmd/cmd.go
cmd/cmd.go
+6
-0
convert/convert.go
convert/convert.go
+97
-14
convert/convert_bert.go
convert/convert_bert.go
+9
-9
convert/convert_gemma.go
convert/convert_gemma.go
+9
-9
convert/convert_gemma2.go
convert/convert_gemma2.go
+6
-6
convert/convert_gemma2_adapter.go
convert/convert_gemma2_adapter.go
+91
-0
convert/convert_llama.go
convert/convert_llama.go
+8
-8
convert/convert_llama_adapter.go
convert/convert_llama_adapter.go
+169
-0
convert/convert_mixtral.go
convert/convert_mixtral.go
+8
-8
convert/convert_phi3.go
convert/convert_phi3.go
+7
-7
convert/convert_test.go
convert/convert_test.go
+238
-24
convert/reader.go
convert/reader.go
+2
-0
llm/ggml.go
llm/ggml.go
+8
-0
server/images.go
server/images.go
+6
-5
server/model.go
server/model.go
+30
-8
server/model_test.go
server/model_test.go
+3
-3
No files found.
cmd/cmd.go
View file @
0c819e16
...
...
@@ -204,6 +204,12 @@ func tempZipFiles(path string) (string, error) {
// safetensors files might be unresolved git lfs references; skip if they are
// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
st
,
_
:=
glob
(
filepath
.
Join
(
path
,
"adapters.safetensors"
),
"application/octet-stream"
);
len
(
st
)
>
0
{
// covers adapters.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
st
,
_
:=
glob
(
filepath
.
Join
(
path
,
"adapter_model.safetensors"
),
"application/octet-stream"
);
len
(
st
)
>
0
{
// covers adapter_model.safetensors
files
=
append
(
files
,
st
...
)
}
else
if
pt
,
_
:=
glob
(
filepath
.
Join
(
path
,
"pytorch_model*.bin"
),
"application/zip"
);
len
(
pt
)
>
0
{
// pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
...
...
convert/convert.go
View file @
0c819e16
...
...
@@ -12,12 +12,22 @@ import (
"github.com/ollama/ollama/llm"
)
type
Parameters
struct
{
type
Model
Parameters
struct
{
Architectures
[]
string
`json:"architectures"`
VocabSize
uint32
`json:"vocab_size"`
}
func
(
Parameters
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
type
AdapterParameters
struct
{
Alpha
uint32
`json:"lora_alpha"`
LoraLayers
uint32
`json:"lora_layers"`
LoraParameters
struct
{
Rank
uint32
`json:"rank"`
Alpha
float32
`json:"alpha"`
Scale
float32
`json:"scale"`
}
`json:"lora_parameters"`
}
func
(
ModelParameters
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
llm
.
KV
{
"general.file_type"
:
uint32
(
1
),
"general.quantization_version"
:
uint32
(
2
),
...
...
@@ -44,17 +54,40 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
Parameters
)
specialTokenTypes
()
[]
string
{
func
(
p
AdapterParameters
)
KV
()
llm
.
KV
{
var
alpha
float32
if
p
.
LoraParameters
.
Alpha
==
0
{
alpha
=
float32
(
p
.
Alpha
)
}
else
{
alpha
=
p
.
LoraParameters
.
Alpha
}
kv
:=
llm
.
KV
{
"adapter.lora.alpha"
:
alpha
,
"adapter.type"
:
"lora"
,
"general.file_type"
:
uint32
(
1
),
"general.type"
:
"adapter"
,
"general.version"
:
"v0.2"
,
}
return
kv
}
func
(
ModelParameters
)
specialTokenTypes
()
[]
string
{
return
[]
string
{
"bos"
,
"eos"
,
"unk"
,
"sep"
,
"pad"
,
"cls"
,
"mask"
,
}
}
func
(
Parameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
func
(
Model
Parameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
return
llm
.
WriteGGUF
(
ws
,
kv
,
ts
)
}
type
Converter
interface
{
func
(
AdapterParameters
)
writeFile
(
ws
io
.
WriteSeeker
,
kv
llm
.
KV
,
ts
[]
llm
.
Tensor
)
error
{
return
llm
.
WriteGGUF
(
ws
,
kv
,
ts
)
}
type
ModelConverter
interface
{
// KV maps parameters to LLM key-values
KV
(
*
Tokenizer
)
llm
.
KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
...
...
@@ -73,17 +106,67 @@ type moreParser interface {
parseMore
(
fs
.
FS
)
error
}
type
AdapterConverter
interface
{
// KV maps parameters to LLM key-values
KV
(
llm
.
KV
)
llm
.
KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors
([]
Tensor
)
[]
llm
.
Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements
()
[]
string
writeFile
(
io
.
WriteSeeker
,
llm
.
KV
,
[]
llm
.
Tensor
)
error
}
func
ConvertAdapter
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
,
baseKV
llm
.
KV
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"adapter_config.json"
)
if
err
!=
nil
{
return
err
}
var
p
AdapterParameters
if
err
:=
json
.
Unmarshal
(
bts
,
&
p
);
err
!=
nil
{
return
err
}
arch
,
ok
:=
baseKV
[
"general.architecture"
]
if
!
ok
{
return
errors
.
New
(
"architecture not set for the base model"
)
}
var
conv
AdapterConverter
switch
arch
{
case
"llama"
:
conv
=
&
llamaAdapter
{}
case
"gemma2"
:
conv
=
&
gemma2Adapter
{}
default
:
return
errors
.
New
(
"unsupported architecture"
)
}
ts
,
err
:=
parseTensors
(
fsys
,
strings
.
NewReplacer
(
conv
.
Replacements
()
...
))
if
err
!=
nil
{
return
err
}
if
err
:=
json
.
Unmarshal
(
bts
,
conv
);
err
!=
nil
{
return
err
}
return
conv
.
writeFile
(
ws
,
conv
.
KV
(
baseKV
),
conv
.
Tensors
(
ts
))
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func
Convert
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
)
error
{
func
Convert
Model
(
fsys
fs
.
FS
,
ws
io
.
WriteSeeker
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"config.json"
)
if
err
!=
nil
{
return
err
}
var
p
Parameters
var
p
Model
Parameters
if
err
:=
json
.
Unmarshal
(
bts
,
&
p
);
err
!=
nil
{
return
err
}
...
...
@@ -92,20 +175,20 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
return
errors
.
New
(
"unknown architecture"
)
}
var
conv
Converter
var
conv
Model
Converter
switch
p
.
Architectures
[
0
]
{
case
"LlamaForCausalLM"
,
"MistralForCausalLM"
:
conv
=
&
llama
{}
conv
=
&
llama
Model
{}
case
"MixtralForCausalLM"
:
conv
=
&
mixtral
{}
conv
=
&
mixtral
Model
{}
case
"GemmaForCausalLM"
:
conv
=
&
gemma
{}
conv
=
&
gemma
Model
{}
case
"Gemma2ForCausalLM"
:
conv
=
&
gemma2
{}
conv
=
&
gemma2
Model
{}
case
"Phi3ForCausalLM"
:
conv
=
&
phi3
{}
conv
=
&
phi3
Model
{}
case
"BertModel"
:
conv
=
&
bert
{}
conv
=
&
bert
Model
{}
default
:
return
errors
.
New
(
"unsupported architecture"
)
}
...
...
convert/convert_bert.go
View file @
0c819e16
...
...
@@ -11,8 +11,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
bert
struct
{
Parameters
type
bert
Model
struct
{
Model
Parameters
NLayers
uint32
`json:"n_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayer
uint32
`json:"n_layer"`
...
...
@@ -33,11 +33,11 @@ type bert struct {
}
var
(
_
Converter
=
(
*
bert
)(
nil
)
_
moreParser
=
(
*
bert
)(
nil
)
_
Model
Converter
=
(
*
bert
Model
)(
nil
)
_
moreParser
=
(
*
bert
Model
)(
nil
)
)
func
(
p
*
bert
)
parseMore
(
fsys
fs
.
FS
)
error
{
func
(
p
*
bert
Model
)
parseMore
(
fsys
fs
.
FS
)
error
{
bts
,
err
:=
fs
.
ReadFile
(
fsys
,
"modules.json"
)
if
err
!=
nil
{
return
err
...
...
@@ -85,8 +85,8 @@ func (p *bert) parseMore(fsys fs.FS) error {
return
nil
}
func
(
p
*
bert
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
bert
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"bert"
kv
[
"bert.attention.causal"
]
=
false
kv
[
"bert.pooling_type"
]
=
p
.
PoolingType
...
...
@@ -132,7 +132,7 @@ func (p *bert) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
bert
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
bert
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
if
slices
.
Contains
([]
string
{
...
...
@@ -154,7 +154,7 @@ func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
bert
)
Replacements
()
[]
string
{
func
(
bert
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"encoder.layer"
,
"blk"
,
"encoder.layers"
,
"blk"
,
...
...
convert/convert_gemma.go
View file @
0c819e16
...
...
@@ -9,8 +9,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
gemma
struct
{
Parameters
type
gemma
Model
struct
{
Model
Parameters
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
...
...
@@ -21,10 +21,10 @@ type gemma struct {
HeadDim
uint32
`json:"head_dim"`
}
var
_
Converter
=
(
*
gemma
)(
nil
)
var
_
Model
Converter
=
(
*
gemma
Model
)(
nil
)
func
(
p
*
gemma
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
gemma
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma"
kv
[
"gemma.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma.embedding_length"
]
=
p
.
HiddenSize
...
...
@@ -42,8 +42,8 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
gemma
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
out
:=
make
([]
llm
.
Tensor
,
0
,
len
(
ts
))
func
(
p
*
gemma
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
if
strings
.
HasSuffix
(
t
.
Name
(),
"_norm.weight"
)
{
t
.
SetRepacker
(
p
.
addOne
)
...
...
@@ -60,7 +60,7 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
p
*
gemma
)
Replacements
()
[]
string
{
func
(
p
*
gemma
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"model.embed_tokens"
,
"token_embd"
,
"model.norm"
,
"output_norm"
,
...
...
@@ -77,7 +77,7 @@ func (p *gemma) Replacements() []string {
}
}
func
(
*
gemma
)
addOne
(
_
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
func
(
*
gemma
Model
)
addOne
(
_
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
n
:=
tensor
.
New
(
tensor
.
WithShape
(
int
(
shape
[
0
])),
tensor
.
WithBacking
(
data
))
ones
:=
tensor
.
Ones
(
tensor
.
Float32
,
int
(
shape
[
0
]))
...
...
convert/convert_gemma2.go
View file @
0c819e16
...
...
@@ -4,15 +4,15 @@ import (
"github.com/ollama/ollama/llm"
)
type
gemma2
struct
{
gemma
type
gemma2
Model
struct
{
gemma
Model
SlidingWindow
uint32
`json:"sliding_window"`
AttentionLogitSoftcap
float32
`json:"attn_logit_softcapping"`
FinalLogitSoftcap
float32
`json:"final_logit_softcapping"`
}
func
(
p
*
gemma2
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
gemma2
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma2"
kv
[
"gemma2.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma2.embedding_length"
]
=
p
.
HiddenSize
...
...
@@ -33,9 +33,9 @@ func (p *gemma2) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
gemma2
)
Replacements
()
[]
string
{
func
(
p
*
gemma2
Model
)
Replacements
()
[]
string
{
return
append
(
p
.
gemma
.
Replacements
(),
p
.
gemma
Model
.
Replacements
(),
"post_attention_layernorm"
,
"post_attention_norm"
,
"pre_feedforward_layernorm"
,
"ffn_norm"
,
"post_feedforward_layernorm"
,
"post_ffw_norm"
,
...
...
convert/convert_gemma2_adapter.go
0 → 100644
View file @
0c819e16
package
convert
import
(
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type
gemma2Adapter
struct
{
AdapterParameters
}
var
_
AdapterConverter
=
(
*
gemma2Adapter
)(
nil
)
func
(
p
*
gemma2Adapter
)
KV
(
baseKV
llm
.
KV
)
llm
.
KV
{
kv
:=
p
.
AdapterParameters
.
KV
()
kv
[
"general.architecture"
]
=
"gemma2"
return
kv
}
func
(
p
*
gemma2Adapter
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
shape
:=
t
.
Shape
()
if
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_a"
)
&&
shape
[
0
]
>
shape
[
1
])
||
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_b"
)
&&
shape
[
0
]
<
shape
[
1
])
{
shape
[
0
],
shape
[
1
]
=
shape
[
1
],
shape
[
0
]
t
.
SetRepacker
(
p
.
repack
)
}
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
t
.
Name
(),
Kind
:
t
.
Kind
(),
Shape
:
t
.
Shape
(),
WriterTo
:
t
,
})
}
return
out
}
func
(
p
*
gemma2Adapter
)
Replacements
()
[]
string
{
return
[]
string
{
"base_model.model."
,
""
,
"model.layers"
,
"blk"
,
"self_attn.q_proj"
,
"attn_q"
,
"self_attn.k_proj"
,
"attn_k"
,
"self_attn.v_proj"
,
"attn_v"
,
"self_attn.o_proj"
,
"attn_output"
,
"mlp.gate_proj"
,
"ffn_gate"
,
"mlp.down_proj"
,
"ffn_down"
,
"mlp.up_proj"
,
"ffn_up"
,
"lora_A.weight"
,
"weight.lora_a"
,
"lora_B.weight"
,
"weight.lora_b"
,
"lora_a"
,
"weight.lora_a"
,
"lora_b"
,
"weight.lora_b"
,
}
}
func
(
p
*
gemma2Adapter
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
if
err
:=
n
.
T
(
1
,
0
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
convert/convert_llama.go
View file @
0c819e16
...
...
@@ -12,8 +12,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
llama
struct
{
Parameters
type
llama
Model
struct
{
Model
Parameters
NLayers
uint32
`json:"n_layers"`
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayer
uint32
`json:"n_layer"`
...
...
@@ -44,10 +44,10 @@ type llama struct {
HeadDim
uint32
`json:"head_dim"`
}
var
_
Converter
=
(
*
llama
)(
nil
)
var
_
Model
Converter
=
(
*
llama
Model
)(
nil
)
func
(
p
*
llama
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
llama
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"llama"
kv
[
"llama.vocab_size"
]
=
p
.
VocabSize
...
...
@@ -120,7 +120,7 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
llama
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
llama
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
if
p
.
RopeScaling
.
factors
!=
nil
{
...
...
@@ -149,7 +149,7 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
p
*
llama
)
Replacements
()
[]
string
{
func
(
p
*
llama
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"lm_head"
,
"output"
,
"model.embed_tokens"
,
"token_embd"
,
...
...
@@ -167,7 +167,7 @@ func (p *llama) Replacements() []string {
}
}
func
(
p
*
llama
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
func
(
p
*
llama
Model
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
var
dims
[]
int
for
_
,
dim
:=
range
shape
{
dims
=
append
(
dims
,
int
(
dim
))
...
...
convert/convert_llama_adapter.go
0 → 100644
View file @
0c819e16
package
convert
import
(
"cmp"
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
)
type
llamaAdapter
struct
{
AdapterParameters
NumAttentionHeads
uint32
`json:"num_attention_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
}
var
_
AdapterConverter
=
(
*
llamaAdapter
)(
nil
)
func
(
p
*
llamaAdapter
)
KV
(
baseKV
llm
.
KV
)
llm
.
KV
{
kv
:=
p
.
AdapterParameters
.
KV
()
kv
[
"general.architecture"
]
=
"llama"
kv
[
"llama.attention.head_count"
]
=
baseKV
[
"llama.attention.head_count"
]
kv
[
"llama.attention.head_count_kv"
]
=
baseKV
[
"llama.attention.head_count_kv"
]
p
.
NumAttentionHeads
=
baseKV
[
"llama.attention.head_count"
]
.
(
uint32
)
return
kv
}
func
(
p
*
llamaAdapter
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
for
_
,
t
:=
range
ts
{
shape
:=
t
.
Shape
()
if
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_a"
)
&&
shape
[
0
]
>
shape
[
1
])
||
(
strings
.
HasSuffix
(
t
.
Name
(),
"weight.lora_b"
)
&&
shape
[
0
]
<
shape
[
1
])
{
shape
[
0
],
shape
[
1
]
=
shape
[
1
],
shape
[
0
]
t
.
SetRepacker
(
p
.
repackAndTranspose
)
}
else
{
t
.
SetRepacker
(
p
.
repack
)
}
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
t
.
Name
(),
Kind
:
t
.
Kind
(),
Shape
:
shape
,
WriterTo
:
t
,
})
}
return
out
}
func
(
p
*
llamaAdapter
)
Replacements
()
[]
string
{
return
[]
string
{
"base_model.model."
,
""
,
"model.layers"
,
"blk"
,
"self_attn.q_proj"
,
"attn_q"
,
"self_attn.k_proj"
,
"attn_k"
,
"self_attn.v_proj"
,
"attn_v"
,
"self_attn.o_proj"
,
"attn_output"
,
"mlp.gate_proj"
,
"ffn_gate"
,
"mlp.down_proj"
,
"ffn_down"
,
"mlp.up_proj"
,
"ffn_up"
,
"lora_A.weight"
,
"weight.lora_a"
,
"lora_B.weight"
,
"weight.lora_b"
,
"lora_a"
,
"weight.lora_a"
,
"lora_b"
,
"weight.lora_b"
,
}
}
func
(
p
*
llamaAdapter
)
repack
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
var
heads
uint32
if
strings
.
HasSuffix
(
name
,
"attn_q.weight.lora_a"
)
{
heads
=
p
.
NumAttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight.lora_a"
)
{
heads
=
cmp
.
Or
(
p
.
NumKeyValueHeads
,
p
.
NumAttentionHeads
)
}
else
{
return
data
,
nil
}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
if
err
:=
n
.
Reshape
(
append
([]
int
{
int
(
heads
),
2
,
dims
[
0
]
/
int
(
heads
)
/
2
},
dims
[
1
:
]
...
)
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
T
(
0
,
2
,
1
,
3
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
func
(
p
*
llamaAdapter
)
repackAndTranspose
(
name
string
,
data
[]
float32
,
shape
[]
uint64
)
([]
float32
,
error
)
{
dims
:=
[]
int
{
int
(
shape
[
1
]),
int
(
shape
[
0
])}
n
:=
tensor
.
New
(
tensor
.
WithShape
(
dims
...
),
tensor
.
WithBacking
(
data
))
var
heads
uint32
if
strings
.
HasSuffix
(
name
,
"attn_q.weight.lora_a"
)
{
heads
=
p
.
NumAttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight.lora_a"
)
{
heads
=
cmp
.
Or
(
p
.
NumKeyValueHeads
,
p
.
NumAttentionHeads
)
}
if
heads
>
0
{
if
err
:=
n
.
Reshape
(
append
([]
int
{
int
(
heads
),
2
,
dims
[
0
]
/
int
(
heads
)
/
2
},
dims
[
1
:
]
...
)
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
T
(
0
,
2
,
1
,
3
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
}
if
err
:=
n
.
T
(
1
,
0
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Reshape
(
dims
...
);
err
!=
nil
{
return
nil
,
err
}
if
err
:=
n
.
Transpose
();
err
!=
nil
{
return
nil
,
err
}
ts
,
err
:=
native
.
SelectF32
(
n
,
1
)
if
err
!=
nil
{
return
nil
,
err
}
var
f32s
[]
float32
for
_
,
t
:=
range
ts
{
f32s
=
append
(
f32s
,
t
...
)
}
return
f32s
,
nil
}
convert/convert_mixtral.go
View file @
0c819e16
...
...
@@ -9,14 +9,14 @@ import (
"github.com/ollama/ollama/llm"
)
type
mixtral
struct
{
llama
type
mixtral
Model
struct
{
llama
Model
NumLocalExperts
uint32
`json:"num_local_experts"`
NumExpertsPerToken
uint32
`json:"num_experts_per_tok"`
}
func
(
p
*
mixtral
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
llama
.
KV
(
t
)
func
(
p
*
mixtral
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
llama
Model
.
KV
(
t
)
if
p
.
NumLocalExperts
>
0
{
kv
[
"llama.expert_count"
]
=
p
.
NumLocalExperts
...
...
@@ -29,7 +29,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
mixtral
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
mixtral
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
oldnew
:=
[]
string
{
"model.layers"
,
"blk"
,
"w1"
,
"ffn_gate_exps"
,
...
...
@@ -67,12 +67,12 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
})
}
return
append
(
out
,
p
.
llama
.
Tensors
(
ts
)
...
)
return
append
(
out
,
p
.
llama
Model
.
Tensors
(
ts
)
...
)
}
func
(
p
*
mixtral
)
Replacements
()
[]
string
{
func
(
p
*
mixtral
Model
)
Replacements
()
[]
string
{
return
append
(
p
.
llama
.
Replacements
(),
p
.
llama
Model
.
Replacements
(),
"block_sparse_moe.gate"
,
"ffn_gate_inp"
,
)
}
...
...
convert/convert_phi3.go
View file @
0c819e16
...
...
@@ -11,8 +11,8 @@ import (
"github.com/ollama/ollama/llm"
)
type
phi3
struct
{
Parameters
type
phi3
Model
struct
{
Model
Parameters
NumHiddenLayers
uint32
`json:"num_hidden_layers"`
NLayers
uint32
`json:"n_layers"`
HiddenSize
uint32
`json:"hidden_size"`
...
...
@@ -35,10 +35,10 @@ type phi3 struct {
SlidingWindow
uint32
`json:"sliding_window"`
}
var
_
Converter
=
(
*
phi3
)(
nil
)
var
_
Model
Converter
=
(
*
phi3
Model
)(
nil
)
func
(
p
*
phi3
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
func
(
p
*
phi3
Model
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Model
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"phi3"
kv
[
"phi3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"phi3.embedding_length"
]
=
cmp
.
Or
(
p
.
HiddenSize
,
p
.
NEmbd
)
...
...
@@ -68,7 +68,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
return
kv
}
func
(
p
*
phi3
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
phi3
Model
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
addRopeFactors
sync
.
Once
out
:=
make
([]
llm
.
Tensor
,
0
,
len
(
ts
)
+
2
)
...
...
@@ -100,7 +100,7 @@ func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
return
out
}
func
(
p
*
phi3
)
Replacements
()
[]
string
{
func
(
p
*
phi3
Model
)
Replacements
()
[]
string
{
return
[]
string
{
"lm_head"
,
"output"
,
"model.embed_tokens"
,
"token_embd"
,
...
...
convert/convert_test.go
View file @
0c819e16
package
convert
import
(
"bytes"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"encoding/json"
"flag"
...
...
@@ -29,7 +31,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
}
defer
f
.
Close
()
if
err
:=
Convert
(
fsys
,
f
);
err
!=
nil
{
if
err
:=
Convert
Model
(
fsys
,
f
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
...
...
@@ -51,6 +53,34 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
return
r
,
m
.
KV
(),
m
.
Tensors
()
}
func
generateResultsJSON
(
t
*
testing
.
T
,
f
*
os
.
File
,
kv
llm
.
KV
,
tensors
llm
.
Tensors
)
map
[
string
]
string
{
actual
:=
make
(
map
[
string
]
string
)
for
k
,
v
:=
range
kv
{
if
s
,
ok
:=
v
.
(
json
.
Marshaler
);
!
ok
{
actual
[
k
]
=
fmt
.
Sprintf
(
"%v"
,
v
)
}
else
{
bts
,
err
:=
json
.
Marshal
(
s
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
k
]
=
fmt
.
Sprintf
(
"%x"
,
sha256
.
Sum256
(
bts
))
}
}
for
_
,
tensor
:=
range
tensors
.
Items
{
sha256sum
:=
sha256
.
New
()
sr
:=
io
.
NewSectionReader
(
f
,
int64
(
tensors
.
Offset
+
tensor
.
Offset
),
int64
(
tensor
.
Size
()))
if
_
,
err
:=
io
.
Copy
(
sha256sum
,
sr
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
tensor
.
Name
]
=
hex
.
EncodeToString
(
sha256sum
.
Sum
(
nil
))
}
return
actual
}
func
TestMain
(
m
*
testing
.
M
)
{
var
level
slog
.
Level
flag
.
TextVar
(
&
level
,
"level"
,
slog
.
LevelInfo
,
"log level"
)
...
...
@@ -85,49 +115,233 @@ func TestConvertFull(t *testing.T) {
}
f
,
kv
,
tensors
:=
convertFull
(
t
,
os
.
DirFS
(
p
))
actual
:=
make
(
map
[
string
]
string
)
for
k
,
v
:=
range
kv
{
if
s
,
ok
:=
v
.
(
json
.
Marshaler
);
!
ok
{
actual
[
k
]
=
fmt
.
Sprintf
(
"%v"
,
v
)
}
else
{
bts
,
err
:=
json
.
Marshal
(
s
)
actual
:=
generateResultsJSON
(
t
,
f
,
kv
,
tensors
)
expectFile
,
err
:=
os
.
Open
(
filepath
.
Join
(
"testdata"
,
fmt
.
Sprintf
(
"%s.json"
,
tt
)))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
[
k
]
=
fmt
.
Sprintf
(
"%x"
,
sha256
.
Sum256
(
bts
))
var
expect
map
[
string
]
string
if
err
:=
json
.
NewDecoder
(
expectFile
)
.
Decode
(
&
expect
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
keys
:=
maps
.
Keys
(
expect
)
slices
.
Sort
(
keys
)
for
_
,
k
:=
range
keys
{
if
v
,
ok
:=
actual
[
k
];
!
ok
{
t
.
Errorf
(
"missing %s"
,
k
)
}
else
if
v
!=
expect
[
k
]
{
t
.
Errorf
(
"unexpected %s: want %s, got %s"
,
k
,
expect
[
k
],
v
)
}
}
})
}
}
for
_
,
tensor
:=
range
tensors
.
Items
{
sha256sum
:=
sha256
.
New
()
sr
:=
io
.
NewSectionReader
(
f
,
int64
(
tensors
.
Offset
+
tensor
.
Offset
),
int64
(
tensor
.
Size
()))
if
_
,
err
:=
io
.
Copy
(
sha256sum
,
sr
);
err
!=
nil
{
func
TestConvertAdapter
(
t
*
testing
.
T
)
{
type
AdapterCase
struct
{
Name
string
BaseKV
map
[
string
]
any
Expected
map
[
string
]
string
}
cases
:=
[]
AdapterCase
{
{
Name
:
"discollama"
,
BaseKV
:
map
[
string
]
any
{
"general.architecture"
:
"llama"
,
"llama.attention.head_count"
:
uint32
(
32
),
"llama.attention.head_count_kv"
:
uint32
(
8
),
},
Expected
:
map
[
string
]
string
{
"general.architecture"
:
"llama"
,
"general.file_type"
:
"1"
,
"general.parameter_count"
:
"106496"
,
"general.type"
:
"adapter"
,
"general.version"
:
"v0.2"
,
"adapter.lora.alpha"
:
"16"
,
"adapter.type"
:
"lora"
,
"llama.attention.head_count"
:
"32"
,
"llama.attention.head_count_kv"
:
"8"
,
"blk.31.attn_q.weight.lora_a"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_q.weight.lora_b"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_v.weight.lora_a"
:
"0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50"
,
"blk.31.attn_v.weight.lora_b"
:
"071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857"
,
},
},
}
for
_
,
c
:=
range
cases
{
t
.
Run
(
c
.
Name
,
func
(
t
*
testing
.
T
)
{
t
.
Parallel
()
f
,
err
:=
os
.
CreateTemp
(
t
.
TempDir
(),
"f16"
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
f
.
Close
()
actual
[
tensor
.
Name
]
=
hex
.
EncodeToString
(
sha256sum
.
Sum
(
nil
))
tempDir
:=
t
.
TempDir
()
generateLoraTestData
(
t
,
tempDir
)
if
err
=
ConvertAdapter
(
os
.
DirFS
(
tempDir
),
f
,
c
.
BaseKV
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
expectFile
,
err
:=
os
.
Open
(
f
ilepath
.
Join
(
"testdata"
,
fmt
.
Sprintf
(
"%s.json"
,
tt
)
))
r
,
err
:=
os
.
Open
(
f
.
Name
(
))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
r
.
Close
()
var
expect
map
[
string
]
string
if
err
:=
json
.
NewDecoder
(
expectFile
)
.
Decode
(
&
expect
);
err
!=
nil
{
m
,
_
,
err
:=
llm
.
DecodeGGML
(
r
,
math
.
MaxInt
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
keys
:=
maps
.
Keys
(
expect
)
if
_
,
err
:=
r
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
t
.
Fatal
(
err
)
}
actual
:=
generateResultsJSON
(
t
,
r
,
m
.
KV
(),
m
.
Tensors
())
keys
:=
maps
.
Keys
(
c
.
Expected
)
slices
.
Sort
(
keys
)
for
_
,
k
:=
range
keys
{
if
v
,
ok
:=
actual
[
k
];
!
ok
{
t
.
Errorf
(
"missing %s"
,
k
)
}
else
if
v
!=
e
xpect
[
k
]
{
t
.
Errorf
(
"unexpected %s: want %s, got %s"
,
k
,
e
xpect
[
k
],
v
)
}
else
if
v
!=
c
.
E
xpect
ed
[
k
]
{
t
.
Errorf
(
"unexpected %s: want %s, got %s"
,
k
,
c
.
E
xpect
ed
[
k
],
v
)
}
}
})
}
}
func
generateLoraTestData
(
t
*
testing
.
T
,
tempDir
string
)
{
type
tensorData
struct
{
Offsets
[]
int
`json:"data_offsets"`
Type
string
`json:"dtype"`
Shape
[]
int
`json:"shape"`
}
offset
:=
4096
*
8
*
4
td
:=
map
[
string
]
*
tensorData
{
"__metadata__"
:
nil
}
td
[
"model.layers.31.self_attn.q_proj.lora_a"
]
=
&
tensorData
{
Offsets
:
[]
int
{
0
,
offset
},
Type
:
"F32"
,
Shape
:
[]
int
{
4096
,
8
},
}
td
[
"model.layers.31.self_attn.q_proj.lora_b"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
,
offset
*
2
},
Type
:
"F32"
,
Shape
:
[]
int
{
8
,
4096
},
}
td
[
"model.layers.31.self_attn.v_proj.lora_a"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
*
2
,
offset
*
3
},
Type
:
"F32"
,
Shape
:
[]
int
{
4096
,
8
},
}
td
[
"model.layers.31.self_attn.v_proj.lora_b"
]
=
&
tensorData
{
Offsets
:
[]
int
{
offset
*
3
,
offset
*
3
+
8
*
1024
*
4
},
Type
:
"F32"
,
Shape
:
[]
int
{
8
,
1024
},
}
data
,
err
:=
json
.
Marshal
(
td
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
var
buf
bytes
.
Buffer
l
:=
int64
(
len
(
data
))
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
l
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
_
,
err
=
buf
.
Write
(
data
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
// write some data for the tensors
ones
:=
make
([]
float32
,
4096
*
8
)
for
i
:=
range
ones
{
ones
[
i
]
=
float32
(
1
)
}
for
range
3
{
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
ones
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
ones
=
make
([]
float32
,
1024
*
8
)
for
i
:=
range
ones
{
ones
[
i
]
=
float32
(
1
)
}
err
=
binary
.
Write
(
&
buf
,
binary
.
LittleEndian
,
ones
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
fdata
,
err
:=
os
.
Create
(
filepath
.
Join
(
tempDir
,
"adapters.safetensors"
))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
fdata
.
Close
()
_
,
err
=
fdata
.
Write
(
buf
.
Bytes
())
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
configData
:=
`
{
"adapter_path": "adapters-test",
"batch_size": 8,
"config": "config-tiny.json",
"data": "../discollama-completion",
"grad_checkpoint": null,
"iters": 1000,
"learning_rate": 1e-05,
"lora_layers": 1,
"lora_parameters": {
"rank": 8,
"alpha": 16,
"dropout": 0.0,
"scale": 2.0
},
"lr_schedule": null,
"max_seq_length": 2048,
"model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
"resume_adapter_file": null,
"save_every": 100,
"seed": 0,
"steps_per_eval": 200,
"steps_per_report": 10,
"test": false,
"test_batches": 500,
"train": true,
"use_dora": false,
"val_batches": 25
}
`
f
,
err
:=
os
.
Create
(
filepath
.
Join
(
tempDir
,
"adapter_config.json"
))
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
defer
f
.
Close
()
_
,
err
=
f
.
WriteString
(
configData
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
convert/reader.go
View file @
0c819e16
...
...
@@ -64,6 +64,8 @@ func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
}{
{
"model-*-of-*.safetensors"
,
parseSafetensors
},
{
"model.safetensors"
,
parseSafetensors
},
{
"adapters.safetensors"
,
parseSafetensors
},
{
"adapter_model.safetensors"
,
parseSafetensors
},
{
"pytorch_model-*-of-*.bin"
,
parseTorch
},
{
"pytorch_model.bin"
,
parseTorch
},
{
"consolidated.*.pth"
,
parseTorch
},
...
...
llm/ggml.go
View file @
0c819e16
...
...
@@ -43,6 +43,14 @@ func (kv KV) Architecture() string {
return
"unknown"
}
func
(
kv
KV
)
Kind
()
string
{
if
s
,
ok
:=
kv
[
"general.type"
]
.
(
string
);
ok
{
return
s
}
return
"unknown"
}
func
(
kv
KV
)
ParameterCount
()
uint64
{
return
kv
.
u64
(
"general.parameter_count"
)
}
...
...
server/images.go
View file @
0c819e16
...
...
@@ -369,13 +369,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
parameters
:=
make
(
map
[
string
]
any
)
var
layers
[]
Layer
var
baseLayers
[]
*
layerGGML
for
_
,
c
:=
range
modelfile
.
Commands
{
mediatype
:=
fmt
.
Sprintf
(
"application/vnd.ollama.image.%s"
,
c
.
Name
)
command
:=
c
.
Name
switch
c
.
Name
{
switch
c
ommand
{
case
"model"
,
"adapter"
:
var
baseLayers
[]
*
layerGGML
if
name
:=
model
.
ParseName
(
c
.
Args
);
name
.
IsValid
()
{
if
name
:=
model
.
ParseName
(
c
.
Args
);
name
.
IsValid
()
&&
command
==
"model"
{
baseLayers
,
err
=
parseFromModel
(
ctx
,
name
,
fn
)
if
err
!=
nil
{
return
err
...
...
@@ -409,14 +410,14 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
}
defer
blob
.
Close
()
baseLayers
,
err
=
parseFromFile
(
ctx
,
blob
,
digest
,
fn
)
baseLayers
,
err
=
parseFromFile
(
ctx
,
command
,
baseLayers
,
blob
,
digest
,
fn
)
if
err
!=
nil
{
return
err
}
}
else
if
file
,
err
:=
os
.
Open
(
realpath
(
modelFileDir
,
c
.
Args
));
err
==
nil
{
defer
file
.
Close
()
baseLayers
,
err
=
parseFromFile
(
ctx
,
file
,
""
,
fn
)
baseLayers
,
err
=
parseFromFile
(
ctx
,
command
,
baseLayers
,
file
,
""
,
fn
)
if
err
!=
nil
{
return
err
}
...
...
server/model.go
View file @
0c819e16
...
...
@@ -81,7 +81,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
return
layers
,
nil
}
func
parseFromZipFile
(
_
context
.
Context
,
f
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
func
parseFromZipFile
(
_
context
.
Context
,
command
string
,
baseLayers
[]
*
layerGGML
,
f
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
fi
,
err
:=
f
.
Stat
()
if
err
!=
nil
{
return
nil
,
err
...
...
@@ -108,16 +108,38 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
defer
t
.
Close
()
defer
os
.
Remove
(
t
.
Name
())
fn
(
api
.
ProgressResponse
{
Status
:
"converting model"
})
if
err
:=
convert
.
Convert
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
);
err
!=
nil
{
var
layerType
string
switch
command
{
case
"adapter"
:
var
baseModel
*
llm
.
GGML
for
_
,
l
:=
range
baseLayers
{
if
l
.
GGML
!=
nil
{
baseModel
=
l
.
GGML
break
}
}
if
baseModel
==
nil
{
return
nil
,
fmt
.
Errorf
(
"no base model specified for the adapter"
)
}
if
err
:=
convert
.
ConvertAdapter
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
,
baseModel
.
KV
());
err
!=
nil
{
return
nil
,
err
}
layerType
=
"application/vnd.ollama.image.adapter"
case
"model"
:
if
err
:=
convert
.
ConvertModel
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
);
err
!=
nil
{
return
nil
,
err
}
layerType
=
"application/vnd.ollama.image.model"
}
if
_
,
err
:=
t
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
return
nil
,
err
}
layer
,
err
:=
NewLayer
(
t
,
"application/vnd.ollama.image.model"
)
layer
,
err
:=
NewLayer
(
t
,
layerType
)
if
err
!=
nil
{
return
nil
,
err
}
...
...
@@ -139,7 +161,7 @@ func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.
return
detectChatTemplate
(
layers
)
}
func
parseFromFile
(
ctx
context
.
Context
,
file
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
func
parseFromFile
(
ctx
context
.
Context
,
command
string
,
baseLayers
[]
*
layerGGML
,
file
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
sr
:=
io
.
NewSectionReader
(
file
,
0
,
512
)
contentType
,
err
:=
detectContentType
(
sr
)
if
err
!=
nil
{
...
...
@@ -150,7 +172,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
case
"gguf"
,
"ggla"
:
// noop
case
"application/zip"
:
return
parseFromZipFile
(
ctx
,
file
,
digest
,
fn
)
return
parseFromZipFile
(
ctx
,
command
,
baseLayers
,
file
,
digest
,
fn
)
default
:
return
nil
,
fmt
.
Errorf
(
"unsupported content type: %s"
,
contentType
)
}
...
...
@@ -170,7 +192,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
}
mediatype
:=
"application/vnd.ollama.image.model"
if
ggml
.
Name
()
==
"ggla"
{
if
ggml
.
Name
()
==
"ggla"
||
ggml
.
KV
()
.
Kind
()
==
"adapter"
{
mediatype
=
"application/vnd.ollama.image.adapter"
}
else
if
ggml
.
KV
()
.
Architecture
()
==
"clip"
{
mediatype
=
"application/vnd.ollama.image.projector"
...
...
server/model_test.go
View file @
0c819e16
...
...
@@ -153,7 +153,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
file
,
""
,
func
(
api
.
ProgressResponse
)
{})
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file
,
""
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
...
...
@@ -166,7 +166,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
layers2
,
err
:=
parseFromFile
(
context
.
Background
(),
file
,
layers
[
0
]
.
Digest
,
func
(
api
.
ProgressResponse
)
{})
layers2
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file
,
layers
[
0
]
.
Digest
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
...
...
@@ -206,7 +206,7 @@ func TestParseLayerFromCopy(t *testing.T) {
t
.
Fatalf
(
"failed to seek to start: %v"
,
err
)
}
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
file2
,
""
,
func
(
api
.
ProgressResponse
)
{})
layers
,
err
:=
parseFromFile
(
context
.
Background
(),
"model"
,
[]
*
layerGGML
{},
file2
,
""
,
func
(
api
.
ProgressResponse
)
{})
if
err
!=
nil
{
t
.
Fatalf
(
"failed to parse from file: %v"
,
err
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment