Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
0ff42e84
Unverified
Commit
0ff42e84
authored
Aug 01, 2024
by
Michael Yang
Committed by
GitHub
Aug 01, 2024
Browse files
Merge pull request #4756 from ollama/mxyng/convert2
refactor convert
parents
3e614260
d8e2664c
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
169 additions
and
652 deletions
+169
-652
convert/torch.go
convert/torch.go
+0
-287
llm/ggla.go
llm/ggla.go
+13
-1
llm/ggml.go
llm/ggml.go
+5
-2
llm/gguf.go
llm/gguf.go
+135
-170
llm/memory_test.go
llm/memory_test.go
+1
-3
server/model.go
server/model.go
+12
-81
server/model_test.go
server/model_test.go
+0
-102
server/routes_create_test.go
server/routes_create_test.go
+1
-2
server/sched_test.go
server/sched_test.go
+2
-4
No files found.
convert/torch.go
deleted
100644 → 0
View file @
3e614260
package
convert
import
(
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type
torchWriterTo
struct
{
t
*
llm
.
Tensor
params
*
Params
bo
ByteOrder
storage
pytorch
.
StorageInterface
repacker
func
(
string
,
[]
float32
,
[]
uint64
)
([]
float32
,
error
)
}
type
TorchFormat
struct
{}
func
(
tf
*
TorchFormat
)
GetTensors
(
dirpath
string
,
params
*
Params
)
([]
llm
.
Tensor
,
error
)
{
slog
.
Debug
(
"getting torch tensors"
)
var
files
[]
string
if
pt
,
_
:=
filepath
.
Glob
(
filepath
.
Join
(
dirpath
,
"consolidated*.pth"
));
len
(
pt
)
>
0
{
files
=
append
(
files
,
pt
...
)
}
else
if
pt
,
_
:=
filepath
.
Glob
(
filepath
.
Join
(
dirpath
,
"pytorch_model*.pth"
));
len
(
pt
)
>
0
{
files
=
append
(
files
,
pt
...
)
}
var
offset
uint64
var
tensors
[]
llm
.
Tensor
for
_
,
fn
:=
range
files
{
m
,
err
:=
pytorch
.
Load
(
fn
)
if
err
!=
nil
{
slog
.
Error
(
fmt
.
Sprintf
(
"error unpickling: %q"
,
err
))
return
[]
llm
.
Tensor
{},
err
}
for
_
,
k
:=
range
m
.
(
*
types
.
Dict
)
.
Keys
()
{
if
strings
.
HasSuffix
(
k
.
(
string
),
"self_attn.rotary_emb.inv_freq"
)
{
continue
}
t
,
_
:=
m
.
(
*
types
.
Dict
)
.
Get
(
k
)
tshape
:=
t
.
(
*
pytorch
.
Tensor
)
.
Size
var
size
uint64
var
kind
uint32
switch
len
(
tshape
)
{
case
0
:
continue
case
1
:
// convert to float32
kind
=
0
size
=
uint64
(
tshape
[
0
]
*
4
)
case
2
:
// convert to float16
kind
=
1
size
=
uint64
(
tshape
[
0
]
*
tshape
[
1
]
*
2
)
}
ggufName
,
err
:=
tf
.
GetLayerName
(
k
.
(
string
))
if
err
!=
nil
{
slog
.
Error
(
err
.
Error
())
return
nil
,
err
}
slog
.
Debug
(
fmt
.
Sprintf
(
"'%35s': '%30s' %10d [%#v]"
,
k
.
(
string
),
ggufName
,
size
,
tshape
))
shape
:=
[]
uint64
{
0
,
0
,
0
,
0
}
for
i
:=
range
tshape
{
shape
[
i
]
=
uint64
(
tshape
[
i
])
}
tensor
:=
llm
.
Tensor
{
Name
:
ggufName
,
Kind
:
kind
,
Offset
:
offset
,
// calculate the offset
Shape
:
shape
,
}
tensor
.
WriterTo
=
torchWriterTo
{
t
:
&
tensor
,
params
:
params
,
bo
:
params
.
ByteOrder
,
storage
:
t
.
(
*
pytorch
.
Tensor
)
.
Source
,
}
tensors
=
append
(
tensors
,
tensor
)
offset
+=
size
}
}
return
tensors
,
nil
}
func
getAltParams
(
dirpath
string
)
(
*
Params
,
error
)
{
f
,
err
:=
os
.
Open
(
filepath
.
Join
(
dirpath
,
"params.json"
))
if
err
!=
nil
{
slog
.
Error
(
"no params.json"
)
return
nil
,
err
}
defer
f
.
Close
()
type
TorchParams
struct
{
HiddenSize
int
`json:"dim"`
AttentionHeads
int
`json:"n_heads"`
KeyValHeads
int
`json:"n_kv_heads"`
HiddenLayers
int
`json:"n_layers"`
RopeTheta
float64
`json:"rope_theta"`
NormEPS
float64
`json:"norm_eps"`
}
var
tparams
TorchParams
d
:=
json
.
NewDecoder
(
f
)
err
=
d
.
Decode
(
&
tparams
)
if
err
!=
nil
{
return
nil
,
err
}
params
:=
&
Params
{
Architectures
:
[]
string
{
"LlamaForCausalLM"
},
HiddenSize
:
tparams
.
HiddenSize
,
AttentionHeads
:
tparams
.
AttentionHeads
,
KeyValHeads
:
tparams
.
KeyValHeads
,
HiddenLayers
:
tparams
.
HiddenLayers
,
NormEPS
:
tparams
.
NormEPS
,
}
switch
{
case
tparams
.
RopeTheta
==
1000000
:
// Codellama
params
.
ContextSize
=
16384
case
tparams
.
NormEPS
==
1e-06
:
// llama2
slog
.
Debug
(
"Found llama2 - setting context size to 4096"
)
params
.
ContextSize
=
4096
default
:
params
.
ContextSize
=
2048
}
params
.
ByteOrder
=
binary
.
LittleEndian
return
params
,
nil
}
func
(
m
*
TorchFormat
)
GetParams
(
dirpath
string
)
(
*
Params
,
error
)
{
f
,
err
:=
os
.
Open
(
filepath
.
Join
(
dirpath
,
"config.json"
))
if
err
!=
nil
{
if
os
.
IsNotExist
(
err
)
{
// try params.json instead
return
getAltParams
(
dirpath
)
}
else
{
return
nil
,
err
}
}
var
params
Params
d
:=
json
.
NewDecoder
(
f
)
err
=
d
.
Decode
(
&
params
)
if
err
!=
nil
{
return
nil
,
err
}
params
.
ByteOrder
=
binary
.
LittleEndian
return
&
params
,
nil
}
func
(
m
*
TorchFormat
)
GetLayerName
(
n
string
)
(
string
,
error
)
{
directMap
:=
map
[
string
]
string
{
"tok_embeddings.weight"
:
"token_embd.weight"
,
"output.weight"
:
"output.weight"
,
"norm.weight"
:
"output_norm.weight"
,
"rope.freqs"
:
"rope_freqs.weight"
,
"model.embed_tokens.weight"
:
"token_embd.weight"
,
"lm_head.weight"
:
"output.weight"
,
"model.norm.weight"
:
"output_norm.weight"
,
}
lMap
:=
map
[
string
]
string
{
"layers.(
\\
d+).attention_norm.weight"
:
"blk.$1.attn_norm.weight"
,
"layers.(
\\
d+).attention_output_norm.weight"
:
"blk.$1.attn_norm.weight"
,
"layers.(
\\
d+).feed_forward.w2.weight"
:
"blk.$1.ffn_down.weight"
,
"layers.(
\\
d+).feed_forward.w1.weight"
:
"blk.$1.ffn_gate.weight"
,
"layers.(
\\
d+).feed_forward.w3.weight"
:
"blk.$1.ffn_up.weight"
,
"layers.(
\\
d+).ffn_norm.weight"
:
"blk.$1.ffn_norm.weight"
,
"layers.(
\\
d+).attention.wk.weight"
:
"blk.$1.attn_k.weight"
,
"layers.(
\\
d+).attention.wo.weight"
:
"blk.$1.attn_output.weight"
,
"layers.(
\\
d+).attention.wq.weight"
:
"blk.$1.attn_q.weight"
,
"layers.(
\\
d+).attention.wv.weight"
:
"blk.$1.attn_v.weight"
,
"model.layers.(
\\
d+).input_layernorm.weight"
:
"blk.$1.attn_norm.weight"
,
"model.layers.(
\\
d+).mlp.down_proj.weight"
:
"blk.$1.ffn_down.weight"
,
"model.layers.(
\\
d+).mlp.gate_proj.weight"
:
"blk.$1.ffn_gate.weight"
,
"model.layers.(
\\
d+).mlp.up_proj.weight"
:
"blk.$1.ffn_up.weight"
,
"model.layers.(
\\
d+).post_attention_layernorm.weight"
:
"blk.$1.ffn_norm.weight"
,
"model.layers.(
\\
d+).self_attn.k_proj.weight"
:
"blk.$1.attn_k.weight"
,
"model.layers.(
\\
d+).self_attn.o_proj.weight"
:
"blk.$1.attn_output.weight"
,
"model.layers.(
\\
d+).self_attn.q_proj.weight"
:
"blk.$1.attn_q.weight"
,
"model.layers.(
\\
d+).self_attn.v_proj.weight"
:
"blk.$1.attn_v.weight"
,
}
v
,
ok
:=
directMap
[
n
]
if
ok
{
return
v
,
nil
}
// quick hack to rename the layers to gguf format
for
k
,
v
:=
range
lMap
{
re
:=
regexp
.
MustCompile
(
k
)
newName
:=
re
.
ReplaceAllString
(
n
,
v
)
if
newName
!=
n
{
return
newName
,
nil
}
}
return
""
,
fmt
.
Errorf
(
"couldn't find a layer name for '%s'"
,
n
)
}
func
(
r
torchWriterTo
)
WriteTo
(
w
io
.
Writer
)
(
n
int64
,
err
error
)
{
var
f32s
[]
float32
switch
s
:=
r
.
storage
.
(
type
)
{
case
*
pytorch
.
FloatStorage
:
f32s
=
s
.
Data
case
*
pytorch
.
HalfStorage
:
f32s
=
s
.
Data
case
*
pytorch
.
BFloat16Storage
:
f32s
=
s
.
Data
default
:
return
0
,
fmt
.
Errorf
(
"unknown data type: %T"
,
s
)
}
if
r
.
repacker
!=
nil
{
f32s
,
err
=
r
.
repacker
(
r
.
t
.
Name
,
f32s
,
r
.
t
.
Shape
)
if
err
!=
nil
{
return
0
,
err
}
}
switch
r
.
t
.
Kind
{
case
0
:
return
0
,
binary
.
Write
(
w
,
r
.
bo
,
f32s
)
case
1
:
f16s
:=
make
([]
uint16
,
len
(
f32s
))
for
i
:=
range
f32s
{
f16s
[
i
]
=
float16
.
Fromfloat32
(
f32s
[
i
])
.
Bits
()
}
return
0
,
binary
.
Write
(
w
,
r
.
bo
,
f16s
)
default
:
return
0
,
fmt
.
Errorf
(
"unknown storage type: %d"
,
r
.
t
.
Kind
)
}
}
func
(
m
*
TorchFormat
)
GetModelArch
(
name
,
dirPath
string
,
params
*
Params
)
(
ModelArch
,
error
)
{
switch
len
(
params
.
Architectures
)
{
case
0
:
return
nil
,
fmt
.
Errorf
(
"No architecture specified to convert"
)
case
1
:
switch
params
.
Architectures
[
0
]
{
case
"LlamaForCausalLM"
:
return
&
LlamaModel
{
ModelData
{
Name
:
name
,
Path
:
dirPath
,
Params
:
params
,
Format
:
m
,
},
},
nil
default
:
return
nil
,
fmt
.
Errorf
(
"Models based on '%s' are not yet supported"
,
params
.
Architectures
[
0
])
}
}
return
nil
,
fmt
.
Errorf
(
"Unknown error"
)
}
llm/ggla.go
View file @
0ff42e84
...
@@ -36,6 +36,8 @@ type ggla struct {
...
@@ -36,6 +36,8 @@ type ggla struct {
kv
KV
kv
KV
tensors
[]
*
Tensor
tensors
[]
*
Tensor
tensorOffset
uint64
}
}
func
newGGLA
(
container
*
containerGGLA
)
*
ggla
{
func
newGGLA
(
container
*
containerGGLA
)
*
ggla
{
...
@@ -50,7 +52,10 @@ func (llm *ggla) KV() KV {
...
@@ -50,7 +52,10 @@ func (llm *ggla) KV() KV {
}
}
func
(
llm
*
ggla
)
Tensors
()
Tensors
{
func
(
llm
*
ggla
)
Tensors
()
Tensors
{
return
llm
.
tensors
return
Tensors
{
Items
:
llm
.
tensors
,
Offset
:
llm
.
tensorOffset
,
}
}
}
func
(
llm
*
ggla
)
decode
(
rs
io
.
ReadSeeker
)
(
retErr
error
)
{
func
(
llm
*
ggla
)
decode
(
rs
io
.
ReadSeeker
)
(
retErr
error
)
{
...
@@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
...
@@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
}
}
llm
.
kv
[
"alpha"
]
=
alpha
llm
.
kv
[
"alpha"
]
=
alpha
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
return
err
}
llm
.
tensorOffset
=
uint64
(
offset
)
for
{
for
{
var
dims
uint32
var
dims
uint32
if
err
:=
binary
.
Read
(
rs
,
binary
.
LittleEndian
,
&
dims
);
err
!=
nil
{
if
err
:=
binary
.
Read
(
rs
,
binary
.
LittleEndian
,
&
dims
);
err
!=
nil
{
...
...
llm/ggml.go
View file @
0ff42e84
...
@@ -112,11 +112,14 @@ func (kv KV) ChatTemplate() string {
...
@@ -112,11 +112,14 @@ func (kv KV) ChatTemplate() string {
return
s
return
s
}
}
type
Tensors
[]
*
Tensor
type
Tensors
struct
{
Items
[]
*
Tensor
Offset
uint64
}
func
(
ts
Tensors
)
Layers
()
map
[
string
]
Layer
{
func
(
ts
Tensors
)
Layers
()
map
[
string
]
Layer
{
layers
:=
make
(
map
[
string
]
Layer
)
layers
:=
make
(
map
[
string
]
Layer
)
for
_
,
t
:=
range
ts
{
for
_
,
t
:=
range
ts
.
Items
{
parts
:=
strings
.
Split
(
t
.
Name
,
"."
)
parts
:=
strings
.
Split
(
t
.
Name
,
"."
)
if
parts
[
0
]
==
"blk"
{
if
parts
[
0
]
==
"blk"
{
// join first and second part, e.g. blk.%d
// join first and second part, e.g. blk.%d
...
...
llm/gguf.go
View file @
0ff42e84
...
@@ -2,11 +2,16 @@ package llm
...
@@ -2,11 +2,16 @@ package llm
import
(
import
(
"bytes"
"bytes"
"cmp"
"encoding/binary"
"encoding/binary"
"encoding/json"
"encoding/json"
"fmt"
"fmt"
"io"
"io"
"log/slog"
"slices"
"strings"
"strings"
"golang.org/x/exp/maps"
)
)
type
containerGGUF
struct
{
type
containerGGUF
struct
{
...
@@ -88,7 +93,8 @@ type gguf struct {
...
@@ -88,7 +93,8 @@ type gguf struct {
kv
KV
kv
KV
tensors
[]
*
Tensor
tensors
[]
*
Tensor
parameters
uint64
parameters
uint64
tensorOffset
uint64
scratch
[
16
<<
10
]
byte
scratch
[
16
<<
10
]
byte
}
}
...
@@ -100,16 +106,15 @@ func newGGUF(container *containerGGUF) *gguf {
...
@@ -100,16 +106,15 @@ func newGGUF(container *containerGGUF) *gguf {
}
}
}
}
func
NewGGUFV3
(
bo
binary
.
ByteOrder
)
*
gguf
{
return
newGGUF
(
&
containerGGUF
{
ByteOrder
:
bo
,
Version
:
3
})
}
func
(
llm
*
gguf
)
KV
()
KV
{
func
(
llm
*
gguf
)
KV
()
KV
{
return
llm
.
kv
return
llm
.
kv
}
}
func
(
llm
*
gguf
)
Tensors
()
Tensors
{
func
(
llm
*
gguf
)
Tensors
()
Tensors
{
return
llm
.
tensors
return
Tensors
{
Items
:
llm
.
tensors
,
Offset
:
llm
.
tensorOffset
,
}
}
}
func
(
llm
*
gguf
)
numTensor
()
uint64
{
func
(
llm
*
gguf
)
numTensor
()
uint64
{
...
@@ -199,7 +204,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
...
@@ -199,7 +204,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
return
fmt
.
Errorf
(
"failed to read tensor dimensions: %w"
,
err
)
return
fmt
.
Errorf
(
"failed to read tensor dimensions: %w"
,
err
)
}
}
shape
:=
[
4
]
uint64
{
1
,
1
,
1
,
1
}
shape
:=
make
(
[]
uint64
,
dims
)
for
i
:=
0
;
uint32
(
i
)
<
dims
;
i
++
{
for
i
:=
0
;
uint32
(
i
)
<
dims
;
i
++
{
shape
[
i
],
err
=
readGGUF
[
uint64
](
llm
,
rs
)
shape
[
i
],
err
=
readGGUF
[
uint64
](
llm
,
rs
)
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -236,13 +241,21 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
...
@@ -236,13 +241,21 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
alignment
=
32
alignment
=
32
}
}
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
return
err
}
padding
:=
ggufPadding
(
offset
,
int64
(
alignment
))
llm
.
tensorOffset
=
uint64
(
offset
+
padding
)
for
_
,
tensor
:=
range
llm
.
tensors
{
for
_
,
tensor
:=
range
llm
.
tensors
{
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
offset
,
err
:=
rs
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to get current offset: %w"
,
err
)
return
fmt
.
Errorf
(
"failed to get current offset: %w"
,
err
)
}
}
padding
:=
llm
.
p
adding
(
offset
,
int64
(
alignment
))
padding
:=
ggufP
adding
(
offset
,
int64
(
alignment
))
if
_
,
err
:=
rs
.
Seek
(
padding
,
io
.
SeekCurrent
);
err
!=
nil
{
if
_
,
err
:=
rs
.
Seek
(
padding
,
io
.
SeekCurrent
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to seek to init padding: %w"
,
err
)
return
fmt
.
Errorf
(
"failed to seek to init padding: %w"
,
err
)
}
}
...
@@ -261,12 +274,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
...
@@ -261,12 +274,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
return
t
,
err
return
t
,
err
}
}
func
writeGGUF
[
V
any
](
llm
*
gguf
,
w
io
.
Writer
,
t
uint32
,
v
V
)
error
{
func
writeGGUF
[
V
any
](
w
io
.
Writer
,
t
uint32
,
v
V
)
error
{
if
err
:=
binary
.
Write
(
w
,
llm
.
ByteOrder
,
t
);
err
!=
nil
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
t
);
err
!=
nil
{
return
err
return
err
}
}
return
binary
.
Write
(
w
,
llm
.
ByteOrder
,
v
)
return
binary
.
Write
(
w
,
binary
.
LittleEndian
,
v
)
}
}
func
readGGUFV1String
(
llm
*
gguf
,
r
io
.
Reader
)
(
string
,
error
)
{
func
readGGUFV1String
(
llm
*
gguf
,
r
io
.
Reader
)
(
string
,
error
)
{
...
@@ -330,12 +343,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) {
...
@@ -330,12 +343,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) {
return
string
(
buf
),
nil
return
string
(
buf
),
nil
}
}
func
writeGGUFString
(
llm
*
gguf
,
w
io
.
Writer
,
s
string
)
error
{
func
writeGGUFString
(
w
io
.
Writer
,
s
string
)
error
{
if
err
:=
binary
.
Write
(
w
,
llm
.
ByteOrder
,
ggufTypeString
);
err
!=
nil
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
ggufTypeString
);
err
!=
nil
{
return
err
return
err
}
}
if
err
:=
binary
.
Write
(
w
,
llm
.
ByteOrder
,
uint64
(
len
(
s
)));
err
!=
nil
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
uint64
(
len
(
s
)));
err
!=
nil
{
return
err
return
err
}
}
...
@@ -476,223 +489,175 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
...
@@ -476,223 +489,175 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
return
a
,
nil
return
a
,
nil
}
}
func
writeGGUFArray
[
S
~
[]
E
,
E
any
](
llm
*
gguf
,
w
io
.
Writer
,
t
uint32
,
s
S
)
error
{
// writeGGUFArray writes a slice s of type E to the write with a gguf type of t
if
err
:=
binary
.
Write
(
w
,
llm
.
ByteOrder
,
ggufTypeArray
);
err
!=
nil
{
func
writeGGUFArray
[
S
~
[]
E
,
E
any
](
w
io
.
Writer
,
t
uint32
,
s
S
)
error
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
ggufTypeArray
);
err
!=
nil
{
return
err
return
err
}
}
if
err
:=
binary
.
Write
(
w
,
llm
.
ByteOrder
,
t
);
err
!=
nil
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
t
);
err
!=
nil
{
return
err
return
err
}
}
if
err
:=
binary
.
Write
(
w
,
llm
.
ByteOrder
,
uint64
(
len
(
s
)));
err
!=
nil
{
if
err
:=
binary
.
Write
(
w
,
binary
.
LittleEndian
,
uint64
(
len
(
s
)));
err
!=
nil
{
return
err
return
err
}
}
for
_
,
e
:=
range
s
{
return
binary
.
Write
(
w
,
binary
.
LittleEndian
,
s
)
if
err
:=
binary
.
Write
(
w
,
llm
.
ByteOrder
,
e
);
err
!=
nil
{
return
err
}
}
return
nil
}
var
ggufKVOrder
=
map
[
string
][]
string
{
"llama"
:
{
"general.architecture"
,
"general.name"
,
"llama.vocab_size"
,
"llama.context_length"
,
"llama.embedding_length"
,
"llama.block_count"
,
"llama.feed_forward_length"
,
"llama.attention.head_count"
,
"llama.attention.head_count_kv"
,
"llama.attention.layer_norm_rms_epsilon"
,
"llama.rope.freq_base"
,
"llama.rope.dimension_count"
,
"llama.expert_count"
,
"llama.expert_used_count"
,
"gemma.context_length"
,
"gemma.embedding_length"
,
"gemma.block_count"
,
"gemma.feed_forward_length"
,
"gemma.attention.head_count"
,
"gemma.attention.head_count_kv"
,
"gemma.attention.layer_norm_rms_epsilon"
,
"gemma.attention.key_length"
,
"gemma.attention.value_length"
,
"general.file_type"
,
"tokenizer.ggml.pre"
,
"tokenizer.ggml.model"
,
"tokenizer.ggml.tokens"
,
"tokenizer.ggml.scores"
,
"tokenizer.ggml.merges"
,
"tokenizer.ggml.token_type"
,
"tokenizer.ggml.bos_token_id"
,
"tokenizer.ggml.eos_token_id"
,
"tokenizer.ggml.unknown_token_id"
,
"tokenizer.ggml.padding_token_id"
,
"tokenizer.ggml.add_bos_token"
,
"tokenizer.ggml.add_eos_token"
,
"tokenizer.chat_template"
,
"bert.pooling_type"
,
},
}
}
func
(
llm
*
gguf
)
Encode
(
ws
io
.
WriteSeeker
,
kv
KV
,
tensors
[]
Tensor
)
error
{
func
WriteGGUF
(
ws
io
.
WriteSeeker
,
kv
KV
,
ts
[]
Tensor
)
error
{
switch
llm
.
Version
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
"GGUF"
));
err
!=
nil
{
case
3
:
llm
.
V3
.
NumTensor
=
uint64
(
len
(
tensors
))
llm
.
V3
.
NumKV
=
uint64
(
len
(
kv
))
default
:
return
fmt
.
Errorf
(
"not implemented: ggufv%d"
,
llm
.
Version
)
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
[]
byte
(
"GGUF"
));
err
!=
nil
{
return
err
return
err
}
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
llm
.
Version
);
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint32
(
3
)
);
err
!=
nil
{
return
err
return
err
}
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
llm
.
numTensor
(
));
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
ts
)
));
err
!=
nil
{
return
err
return
err
}
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
llm
.
numKV
(
));
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
kv
)
));
err
!=
nil
{
return
err
return
err
}
}
kvCheck
:=
make
(
map
[
string
]
bool
)
keys
:=
maps
.
Keys
(
kv
)
for
k
:=
range
kv
{
slices
.
Sort
(
keys
)
kvCheck
[
k
]
=
false
for
_
,
key
:=
range
keys
{
if
err
:=
ggufWriteKV
(
ws
,
key
,
kv
[
key
]);
err
!=
nil
{
return
err
}
}
}
for
_
,
k
:=
range
ggufKVOrder
[
"llama"
]
{
slices
.
SortFunc
(
ts
,
func
(
a
,
b
Tensor
)
int
{
v
,
ok
:=
kv
[
k
]
var
i
,
j
int
if
!
ok
{
if
n
,
err
:=
fmt
.
Sscanf
(
a
.
Name
,
"blk.%d"
,
&
i
);
err
!=
nil
||
n
!=
1
{
continue
return
cmp
.
Compare
(
a
.
Name
,
b
.
Name
)
}
else
if
n
,
err
:=
fmt
.
Sscanf
(
b
.
Name
,
"blk.%d"
,
&
j
);
err
!=
nil
||
n
!=
1
{
return
cmp
.
Compare
(
a
.
Name
,
b
.
Name
)
}
}
kvCheck
[
k
]
=
true
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
uint64
(
len
(
k
)));
err
!=
nil
{
return
cmp
.
Compare
(
i
,
j
)
})
var
s
uint64
for
_
,
t
:=
range
ts
{
t
.
Offset
=
s
if
err
:=
ggufWriteTensorInfo
(
ws
,
t
);
err
!=
nil
{
return
err
return
err
}
}
s
+=
t
.
Size
()
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
[]
byte
(
k
));
err
!=
nil
{
var
alignment
int64
=
32
for
_
,
t
:=
range
ts
{
if
err
:=
ggufWriteTensor
(
ws
,
t
,
alignment
);
err
!=
nil
{
return
err
return
err
}
}
}
var
err
error
return
nil
switch
v
:=
v
.
(
type
)
{
}
case
uint32
:
err
=
writeGGUF
(
llm
,
ws
,
ggufTypeUint32
,
v
)
case
float32
:
err
=
writeGGUF
(
llm
,
ws
,
ggufTypeFloat32
,
v
)
case
bool
:
err
=
writeGGUF
(
llm
,
ws
,
ggufTypeBool
,
v
)
case
string
:
err
=
writeGGUFString
(
llm
,
ws
,
v
)
case
[]
int32
:
err
=
writeGGUFArray
(
llm
,
ws
,
ggufTypeInt32
,
v
)
case
[]
uint32
:
err
=
writeGGUFArray
(
llm
,
ws
,
ggufTypeUint32
,
v
)
case
[]
float32
:
err
=
writeGGUFArray
(
llm
,
ws
,
ggufTypeFloat32
,
v
)
case
[]
string
:
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
ggufTypeArray
);
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
ggufTypeString
);
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
uint64
(
len
(
v
)));
err
!=
nil
{
return
err
}
for
_
,
e
:=
range
v
{
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
uint64
(
len
(
e
)));
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
[]
byte
(
e
));
err
!=
nil
{
func
ggufWriteKV
(
ws
io
.
WriteSeeker
,
k
string
,
v
any
)
error
{
return
err
slog
.
Debug
(
k
,
"type"
,
fmt
.
Sprintf
(
"%T"
,
v
))
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
k
)));
err
!=
nil
{
}
return
err
default
:
return
fmt
.
Errorf
(
"improper type for '%s'"
,
k
)
}
if
err
!=
nil
{
return
err
}
}
}
for
k
,
v
:=
range
kvCheck
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
k
));
err
!=
nil
{
if
!
v
{
return
err
return
fmt
.
Errorf
(
"Didn't know how to write kv %s"
,
k
)
}
}
}
for
_
,
tensor
:=
range
tensors
{
var
err
error
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
uint64
(
len
(
tensor
.
Name
)));
err
!=
nil
{
switch
v
:=
v
.
(
type
)
{
case
uint32
:
err
=
writeGGUF
(
ws
,
ggufTypeUint32
,
v
)
case
float32
:
err
=
writeGGUF
(
ws
,
ggufTypeFloat32
,
v
)
case
bool
:
err
=
writeGGUF
(
ws
,
ggufTypeBool
,
v
)
case
string
:
err
=
writeGGUFString
(
ws
,
v
)
case
[]
int32
:
err
=
writeGGUFArray
(
ws
,
ggufTypeInt32
,
v
)
case
[]
uint32
:
err
=
writeGGUFArray
(
ws
,
ggufTypeUint32
,
v
)
case
[]
float32
:
err
=
writeGGUFArray
(
ws
,
ggufTypeFloat32
,
v
)
case
[]
string
:
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
ggufTypeArray
);
err
!=
nil
{
return
err
return
err
}
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
[]
byte
(
tensor
.
Name
)
);
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
ggufTypeString
);
err
!=
nil
{
return
err
return
err
}
}
var
dims
int
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
v
)));
err
!=
nil
{
for
cnt
:=
range
len
(
tensor
.
Shape
)
{
if
tensor
.
Shape
[
cnt
]
>
0
{
dims
++
}
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
uint32
(
dims
));
err
!=
nil
{
return
err
return
err
}
}
for
i
:=
range
dims
{
for
_
,
e
:=
range
v
{
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
tensor
.
Shape
[
dims
-
1
-
i
]
);
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
e
))
);
err
!=
nil
{
return
err
return
err
}
}
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
tensor
.
Kind
);
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
e
));
err
!=
nil
{
return
err
return
err
}
}
}
default
:
return
fmt
.
Errorf
(
"improper type for '%s'"
,
k
)
}
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
tensor
.
Offset
);
err
!=
nil
{
return
err
return
err
}
}
func
ggufWriteTensorInfo
(
ws
io
.
WriteSeeker
,
t
Tensor
)
error
{
slog
.
Debug
(
t
.
Name
,
"kind"
,
t
.
Kind
,
"shape"
,
t
.
Shape
,
"offset"
,
t
.
Offset
)
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint64
(
len
(
t
.
Name
)));
err
!=
nil
{
return
err
}
}
var
alignment
int64
=
32
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
[]
byte
(
t
.
Name
));
err
!=
nil
{
for
_
,
tensor
:=
range
tensors
{
return
err
offset
,
err
:=
ws
.
Seek
(
0
,
io
.
SeekCurrent
)
}
if
err
!=
nil
{
return
err
}
padding
:=
llm
.
padding
(
offset
,
alignment
)
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
uint32
(
len
(
t
.
Shape
)));
err
!=
nil
{
if
err
:=
binary
.
Write
(
ws
,
llm
.
ByteOrder
,
bytes
.
Repeat
([]
byte
{
0
},
int
(
padding
)));
err
!=
nil
{
return
err
return
err
}
}
if
_
,
err
:=
tensor
.
WriteTo
(
ws
);
err
!=
nil
{
for
i
:=
range
len
(
t
.
Shape
)
{
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
t
.
Shape
[
len
(
t
.
Shape
)
-
i
-
1
]);
err
!=
nil
{
return
err
return
err
}
}
}
}
return
nil
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
t
.
Kind
);
err
!=
nil
{
return
err
}
return
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
t
.
Offset
)
}
func
ggufWriteTensor
(
ws
io
.
WriteSeeker
,
t
Tensor
,
alignment
int64
)
error
{
offset
,
err
:=
ws
.
Seek
(
0
,
io
.
SeekCurrent
)
if
err
!=
nil
{
return
err
}
if
err
:=
binary
.
Write
(
ws
,
binary
.
LittleEndian
,
bytes
.
Repeat
([]
byte
{
0
},
int
(
ggufPadding
(
offset
,
alignment
))));
err
!=
nil
{
return
err
}
_
,
err
=
t
.
WriteTo
(
ws
)
return
err
}
}
func
(
gguf
)
p
adding
(
offset
,
align
int64
)
int64
{
func
gguf
P
adding
(
offset
,
align
int64
)
int64
{
return
(
align
-
offset
%
align
)
%
align
return
(
align
-
offset
%
align
)
%
align
}
}
llm/memory_test.go
View file @
0ff42e84
...
@@ -2,7 +2,6 @@ package llm
...
@@ -2,7 +2,6 @@ package llm
import
(
import
(
"bytes"
"bytes"
"encoding/binary"
"fmt"
"fmt"
"os"
"os"
"testing"
"testing"
...
@@ -20,7 +19,6 @@ func TestEstimateGPULayers(t *testing.T) {
...
@@ -20,7 +19,6 @@ func TestEstimateGPULayers(t *testing.T) {
f
,
err
:=
os
.
CreateTemp
(
t
.
TempDir
(),
modelName
)
f
,
err
:=
os
.
CreateTemp
(
t
.
TempDir
(),
modelName
)
require
.
NoError
(
t
,
err
)
require
.
NoError
(
t
,
err
)
defer
f
.
Close
()
defer
f
.
Close
()
gguf
:=
NewGGUFV3
(
binary
.
LittleEndian
)
inputLayerCount
:=
5
inputLayerCount
:=
5
tensors
:=
[]
Tensor
{
tensors
:=
[]
Tensor
{
...
@@ -32,7 +30,7 @@ func TestEstimateGPULayers(t *testing.T) {
...
@@ -32,7 +30,7 @@ func TestEstimateGPULayers(t *testing.T) {
{
Name
:
"output.weight"
,
Kind
:
uint32
(
0
),
Offset
:
uint64
(
0
),
Shape
:
[]
uint64
{
1
,
1
,
1
,
1
},
WriterTo
:
bytes
.
NewReader
(
make
([]
byte
,
32
))},
{
Name
:
"output.weight"
,
Kind
:
uint32
(
0
),
Offset
:
uint64
(
0
),
Shape
:
[]
uint64
{
1
,
1
,
1
,
1
},
WriterTo
:
bytes
.
NewReader
(
make
([]
byte
,
32
))},
}
}
assert
.
Len
(
t
,
tensors
,
inputLayerCount
+
1
)
assert
.
Len
(
t
,
tensors
,
inputLayerCount
+
1
)
err
=
gguf
.
Encode
(
f
,
KV
{
err
=
WriteGGUF
(
f
,
KV
{
"general.architecture"
:
"llama"
,
"general.architecture"
:
"llama"
,
"general.name"
:
"name"
,
"general.name"
:
"name"
,
"llama.context_length"
:
uint32
(
32
),
"llama.context_length"
:
uint32
(
32
),
...
...
server/model.go
View file @
0ff42e84
...
@@ -81,112 +81,43 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
...
@@ -81,112 +81,43 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
return
layers
,
nil
return
layers
,
nil
}
}
func
extractFromZipFile
(
p
string
,
file
*
os
.
File
,
fn
func
(
api
.
ProgressResponse
))
error
{
func
parseFromZipFile
(
_
context
.
Context
,
f
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
stat
,
err
:=
file
.
Stat
()
fi
,
err
:=
f
.
Stat
()
if
err
!=
nil
{
return
err
}
r
,
err
:=
zip
.
NewReader
(
file
,
stat
.
Size
())
if
err
!=
nil
{
return
err
}
fn
(
api
.
ProgressResponse
{
Status
:
"unpacking model metadata"
})
for
_
,
f
:=
range
r
.
File
{
if
!
filepath
.
IsLocal
(
f
.
Name
)
{
return
fmt
.
Errorf
(
"%w: %s"
,
zip
.
ErrInsecurePath
,
f
.
Name
)
}
n
:=
filepath
.
Join
(
p
,
f
.
Name
)
if
err
:=
os
.
MkdirAll
(
filepath
.
Dir
(
n
),
0
o750
);
err
!=
nil
{
return
err
}
// TODO(mxyng): this should not write out all files to disk
outfile
,
err
:=
os
.
Create
(
n
)
if
err
!=
nil
{
return
err
}
defer
outfile
.
Close
()
infile
,
err
:=
f
.
Open
()
if
err
!=
nil
{
return
err
}
defer
infile
.
Close
()
if
_
,
err
=
io
.
Copy
(
outfile
,
infile
);
err
!=
nil
{
return
err
}
if
err
:=
outfile
.
Close
();
err
!=
nil
{
return
err
}
if
err
:=
infile
.
Close
();
err
!=
nil
{
return
err
}
}
return
nil
}
func
parseFromZipFile
(
_
context
.
Context
,
file
*
os
.
File
,
digest
string
,
fn
func
(
api
.
ProgressResponse
))
(
layers
[]
*
layerGGML
,
err
error
)
{
tempDir
,
err
:=
os
.
MkdirTemp
(
filepath
.
Dir
(
file
.
Name
()),
""
)
if
err
!=
nil
{
return
nil
,
err
}
defer
os
.
RemoveAll
(
tempDir
)
if
err
:=
extractFromZipFile
(
tempDir
,
file
,
fn
);
err
!=
nil
{
return
nil
,
err
}
mf
,
err
:=
convert
.
GetModelFormat
(
tempDir
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
params
,
err
:=
mf
.
GetParams
(
tempDir
)
r
,
err
:=
zip
.
NewReader
(
f
,
fi
.
Size
()
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
mArch
,
err
:=
mf
.
GetModelArch
(
""
,
tempDir
,
params
)
p
,
err
:=
os
.
MkdirTemp
(
filepath
.
Dir
(
f
.
Name
()),
""
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
defer
os
.
RemoveAll
(
p
)
fn
(
api
.
ProgressResponse
{
Status
:
"processing tensors"
})
if
err
:=
mArch
.
GetTensors
();
err
!=
nil
{
return
nil
,
err
}
if
err
:=
mArch
.
LoadVocab
();
err
!=
nil
{
return
nil
,
err
}
fn
(
api
.
ProgressResponse
{
Status
:
"converting model"
})
fn
(
api
.
ProgressResponse
{
Status
:
"converting model"
})
// TODO(mxyng): this should write directly into a layer
// TODO(mxyng): this should write directly into a layer
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
t
emp
,
err
:=
os
.
CreateTemp
(
tempDir
,
"fp16"
)
t
,
err
:=
os
.
CreateTemp
(
p
,
"fp16"
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
defer
t
emp
.
Close
()
defer
t
.
Close
()
defer
os
.
Remove
(
t
emp
.
Name
())
defer
os
.
Remove
(
t
.
Name
())
if
err
=
mArch
.
WriteGGUF
(
temp
);
err
!=
nil
{
fn
(
api
.
ProgressResponse
{
Status
:
"converting model"
})
if
err
:=
convert
.
Convert
(
convert
.
NewZipReader
(
r
,
p
,
32
<<
20
),
t
);
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
if
_
,
err
:=
t
emp
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
if
_
,
err
:=
t
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
layer
,
err
:=
NewLayer
(
t
emp
,
"application/vnd.ollama.image.model"
)
layer
,
err
:=
NewLayer
(
t
,
"application/vnd.ollama.image.model"
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
...
...
server/model_test.go
View file @
0ff42e84
package
server
package
server
import
(
import
(
"archive/zip"
"bytes"
"bytes"
"encoding/json"
"encoding/json"
"errors"
"fmt"
"fmt"
"io"
"os"
"os"
"path/filepath"
"path/filepath"
"slices"
"strings"
"testing"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp"
...
@@ -18,103 +13,6 @@ import (
...
@@ -18,103 +13,6 @@ import (
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/template"
)
)
func
createZipFile
(
t
*
testing
.
T
,
name
string
)
*
os
.
File
{
t
.
Helper
()
f
,
err
:=
os
.
CreateTemp
(
t
.
TempDir
(),
""
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
zf
:=
zip
.
NewWriter
(
f
)
defer
zf
.
Close
()
zh
,
err
:=
zf
.
CreateHeader
(
&
zip
.
FileHeader
{
Name
:
name
})
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
if
_
,
err
:=
io
.
Copy
(
zh
,
bytes
.
NewReader
([]
byte
(
""
)));
err
!=
nil
{
t
.
Fatal
(
err
)
}
return
f
}
func
TestExtractFromZipFile
(
t
*
testing
.
T
)
{
cases
:=
[]
struct
{
name
string
expect
[]
string
err
error
}{
{
name
:
"good"
,
expect
:
[]
string
{
"good"
},
},
{
name
:
strings
.
Join
([]
string
{
"path"
,
".."
,
"to"
,
"good"
},
string
(
os
.
PathSeparator
)),
expect
:
[]
string
{
filepath
.
Join
(
"to"
,
"good"
)},
},
{
name
:
strings
.
Join
([]
string
{
"path"
,
".."
,
"to"
,
".."
,
"good"
},
string
(
os
.
PathSeparator
)),
expect
:
[]
string
{
"good"
},
},
{
name
:
strings
.
Join
([]
string
{
"path"
,
"to"
,
".."
,
".."
,
"good"
},
string
(
os
.
PathSeparator
)),
expect
:
[]
string
{
"good"
},
},
{
name
:
strings
.
Join
([]
string
{
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
".."
,
"bad"
},
string
(
os
.
PathSeparator
)),
err
:
zip
.
ErrInsecurePath
,
},
{
name
:
strings
.
Join
([]
string
{
"path"
,
".."
,
".."
,
"to"
,
"bad"
},
string
(
os
.
PathSeparator
)),
err
:
zip
.
ErrInsecurePath
,
},
}
for
_
,
tt
:=
range
cases
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
f
:=
createZipFile
(
t
,
tt
.
name
)
defer
f
.
Close
()
tempDir
:=
t
.
TempDir
()
if
err
:=
extractFromZipFile
(
tempDir
,
f
,
func
(
api
.
ProgressResponse
)
{});
!
errors
.
Is
(
err
,
tt
.
err
)
{
t
.
Fatal
(
err
)
}
var
matches
[]
string
if
err
:=
filepath
.
Walk
(
tempDir
,
func
(
p
string
,
fi
os
.
FileInfo
,
err
error
)
error
{
if
err
!=
nil
{
return
err
}
if
!
fi
.
IsDir
()
{
matches
=
append
(
matches
,
p
)
}
return
nil
});
err
!=
nil
{
t
.
Fatal
(
err
)
}
var
actual
[]
string
for
_
,
match
:=
range
matches
{
rel
,
err
:=
filepath
.
Rel
(
tempDir
,
match
)
if
err
!=
nil
{
t
.
Error
(
err
)
}
actual
=
append
(
actual
,
rel
)
}
if
!
slices
.
Equal
(
actual
,
tt
.
expect
)
{
t
.
Fatalf
(
"expected %d files, got %d"
,
len
(
tt
.
expect
),
len
(
matches
))
}
})
}
}
func
readFile
(
t
*
testing
.
T
,
base
,
name
string
)
*
bytes
.
Buffer
{
func
readFile
(
t
*
testing
.
T
,
base
,
name
string
)
*
bytes
.
Buffer
{
t
.
Helper
()
t
.
Helper
()
...
...
server/routes_create_test.go
View file @
0ff42e84
...
@@ -2,7 +2,6 @@ package server
...
@@ -2,7 +2,6 @@ package server
import
(
import
(
"bytes"
"bytes"
"encoding/binary"
"encoding/json"
"encoding/json"
"fmt"
"fmt"
"io"
"io"
...
@@ -29,7 +28,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
...
@@ -29,7 +28,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
}
}
defer
f
.
Close
()
defer
f
.
Close
()
if
err
:=
llm
.
NewGGUFV3
(
binary
.
LittleEndian
)
.
Encode
(
f
,
kv
,
ti
);
err
!=
nil
{
if
err
:=
llm
.
WriteGGUF
(
f
,
kv
,
ti
);
err
!=
nil
{
t
.
Fatal
(
err
)
t
.
Fatal
(
err
)
}
}
...
...
server/sched_test.go
View file @
0ff42e84
...
@@ -3,7 +3,6 @@ package server
...
@@ -3,7 +3,6 @@ package server
import
(
import
(
"bytes"
"bytes"
"context"
"context"
"encoding/binary"
"fmt"
"fmt"
"log/slog"
"log/slog"
"os"
"os"
...
@@ -114,8 +113,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
...
@@ -114,8 +113,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
require
.
NoError
(
t
,
err
)
require
.
NoError
(
t
,
err
)
defer
f
.
Close
()
defer
f
.
Close
()
gguf
:=
llm
.
NewGGUFV3
(
binary
.
LittleEndian
)
require
.
NoError
(
t
,
llm
.
WriteGGUF
(
f
,
llm
.
KV
{
err
=
gguf
.
Encode
(
f
,
llm
.
KV
{
"general.architecture"
:
"llama"
,
"general.architecture"
:
"llama"
,
"general.name"
:
"name"
,
"general.name"
:
"name"
,
"llama.context_length"
:
uint32
(
32
),
"llama.context_length"
:
uint32
(
32
),
...
@@ -129,7 +127,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
...
@@ -129,7 +127,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
},
[]
llm
.
Tensor
{
},
[]
llm
.
Tensor
{
{
Name
:
"blk.0.attn.weight"
,
Kind
:
uint32
(
0
),
Offset
:
uint64
(
0
),
Shape
:
[]
uint64
{
1
,
1
,
1
,
1
},
WriterTo
:
bytes
.
NewReader
(
make
([]
byte
,
32
))},
{
Name
:
"blk.0.attn.weight"
,
Kind
:
uint32
(
0
),
Offset
:
uint64
(
0
),
Shape
:
[]
uint64
{
1
,
1
,
1
,
1
},
WriterTo
:
bytes
.
NewReader
(
make
([]
byte
,
32
))},
{
Name
:
"output.weight"
,
Kind
:
uint32
(
0
),
Offset
:
uint64
(
0
),
Shape
:
[]
uint64
{
1
,
1
,
1
,
1
},
WriterTo
:
bytes
.
NewReader
(
make
([]
byte
,
32
))},
{
Name
:
"output.weight"
,
Kind
:
uint32
(
0
),
Offset
:
uint64
(
0
),
Shape
:
[]
uint64
{
1
,
1
,
1
,
1
},
WriterTo
:
bytes
.
NewReader
(
make
([]
byte
,
32
))},
})
})
)
require
.
NoError
(
t
,
err
)
require
.
NoError
(
t
,
err
)
fname
:=
f
.
Name
()
fname
:=
f
.
Name
()
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment