"vscode:/vscode.git/clone" did not exist on "619b9658e286ed10560a13f80084e286a6d85956"
Unverified Commit 0ff42e84 authored by Michael Yang's avatar Michael Yang Committed by GitHub
Browse files

Merge pull request #4756 from ollama/mxyng/convert2

refactor convert
parents 3e614260 d8e2664c
package convert
import (
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
type torchWriterTo struct {
t *llm.Tensor
params *Params
bo ByteOrder
storage pytorch.StorageInterface
repacker func(string, []float32, []uint64) ([]float32, error)
}
type TorchFormat struct{}
func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
slog.Debug("getting torch tensors")
var files []string
if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
files = append(files, pt...)
} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
files = append(files, pt...)
}
var offset uint64
var tensors []llm.Tensor
for _, fn := range files {
m, err := pytorch.Load(fn)
if err != nil {
slog.Error(fmt.Sprintf("error unpickling: %q", err))
return []llm.Tensor{}, err
}
for _, k := range m.(*types.Dict).Keys() {
if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
continue
}
t, _ := m.(*types.Dict).Get(k)
tshape := t.(*pytorch.Tensor).Size
var size uint64
var kind uint32
switch len(tshape) {
case 0:
continue
case 1:
// convert to float32
kind = 0
size = uint64(tshape[0] * 4)
case 2:
// convert to float16
kind = 1
size = uint64(tshape[0] * tshape[1] * 2)
}
ggufName, err := tf.GetLayerName(k.(string))
if err != nil {
slog.Error(err.Error())
return nil, err
}
slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
shape := []uint64{0, 0, 0, 0}
for i := range tshape {
shape[i] = uint64(tshape[i])
}
tensor := llm.Tensor{
Name: ggufName,
Kind: kind,
Offset: offset, // calculate the offset
Shape: shape,
}
tensor.WriterTo = torchWriterTo{
t: &tensor,
params: params,
bo: params.ByteOrder,
storage: t.(*pytorch.Tensor).Source,
}
tensors = append(tensors, tensor)
offset += size
}
}
return tensors, nil
}
func getAltParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "params.json"))
if err != nil {
slog.Error("no params.json")
return nil, err
}
defer f.Close()
type TorchParams struct {
HiddenSize int `json:"dim"`
AttentionHeads int `json:"n_heads"`
KeyValHeads int `json:"n_kv_heads"`
HiddenLayers int `json:"n_layers"`
RopeTheta float64 `json:"rope_theta"`
NormEPS float64 `json:"norm_eps"`
}
var tparams TorchParams
d := json.NewDecoder(f)
err = d.Decode(&tparams)
if err != nil {
return nil, err
}
params := &Params{
Architectures: []string{"LlamaForCausalLM"},
HiddenSize: tparams.HiddenSize,
AttentionHeads: tparams.AttentionHeads,
KeyValHeads: tparams.KeyValHeads,
HiddenLayers: tparams.HiddenLayers,
NormEPS: tparams.NormEPS,
}
switch {
case tparams.RopeTheta == 1000000:
// Codellama
params.ContextSize = 16384
case tparams.NormEPS == 1e-06:
// llama2
slog.Debug("Found llama2 - setting context size to 4096")
params.ContextSize = 4096
default:
params.ContextSize = 2048
}
params.ByteOrder = binary.LittleEndian
return params, nil
}
func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
f, err := os.Open(filepath.Join(dirpath, "config.json"))
if err != nil {
if os.IsNotExist(err) {
// try params.json instead
return getAltParams(dirpath)
} else {
return nil, err
}
}
var params Params
d := json.NewDecoder(f)
err = d.Decode(&params)
if err != nil {
return nil, err
}
params.ByteOrder = binary.LittleEndian
return &params, nil
}
func (m *TorchFormat) GetLayerName(n string) (string, error) {
directMap := map[string]string{
"tok_embeddings.weight": "token_embd.weight",
"output.weight": "output.weight",
"norm.weight": "output_norm.weight",
"rope.freqs": "rope_freqs.weight",
"model.embed_tokens.weight": "token_embd.weight",
"lm_head.weight": "output.weight",
"model.norm.weight": "output_norm.weight",
}
lMap := map[string]string{
"layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
"layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
"layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
"layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
"layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
"layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
"layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
"layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
"layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
"layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
"model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
"model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
"model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
"model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
"model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
"model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
"model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
"model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
}
v, ok := directMap[n]
if ok {
return v, nil
}
// quick hack to rename the layers to gguf format
for k, v := range lMap {
re := regexp.MustCompile(k)
newName := re.ReplaceAllString(n, v)
if newName != n {
return newName, nil
}
}
return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
}
func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
var f32s []float32
switch s := r.storage.(type) {
case *pytorch.FloatStorage:
f32s = s.Data
case *pytorch.HalfStorage:
f32s = s.Data
case *pytorch.BFloat16Storage:
f32s = s.Data
default:
return 0, fmt.Errorf("unknown data type: %T", s)
}
if r.repacker != nil {
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
if err != nil {
return 0, err
}
}
switch r.t.Kind {
case 0:
return 0, binary.Write(w, r.bo, f32s)
case 1:
f16s := make([]uint16, len(f32s))
for i := range f32s {
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
}
return 0, binary.Write(w, r.bo, f16s)
default:
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
}
}
func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
switch len(params.Architectures) {
case 0:
return nil, fmt.Errorf("No architecture specified to convert")
case 1:
switch params.Architectures[0] {
case "LlamaForCausalLM":
return &LlamaModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
default:
return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
}
}
return nil, fmt.Errorf("Unknown error")
}
......@@ -36,6 +36,8 @@ type ggla struct {
kv KV
tensors []*Tensor
tensorOffset uint64
}
func newGGLA(container *containerGGLA) *ggla {
......@@ -50,7 +52,10 @@ func (llm *ggla) KV() KV {
}
func (llm *ggla) Tensors() Tensors {
return llm.tensors
return Tensors{
Items: llm.tensors,
Offset: llm.tensorOffset,
}
}
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
......@@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
}
llm.kv["alpha"] = alpha
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
llm.tensorOffset = uint64(offset)
for {
var dims uint32
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
......
......@@ -112,11 +112,14 @@ func (kv KV) ChatTemplate() string {
return s
}
type Tensors []*Tensor
type Tensors struct {
Items []*Tensor
Offset uint64
}
func (ts Tensors) Layers() map[string]Layer {
layers := make(map[string]Layer)
for _, t := range ts {
for _, t := range ts.Items {
parts := strings.Split(t.Name, ".")
if parts[0] == "blk" {
// join first and second part, e.g. blk.%d
......
......@@ -2,11 +2,16 @@ package llm
import (
"bytes"
"cmp"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"slices"
"strings"
"golang.org/x/exp/maps"
)
type containerGGUF struct {
......@@ -88,7 +93,8 @@ type gguf struct {
kv KV
tensors []*Tensor
parameters uint64
parameters uint64
tensorOffset uint64
scratch [16 << 10]byte
}
......@@ -100,16 +106,15 @@ func newGGUF(container *containerGGUF) *gguf {
}
}
func NewGGUFV3(bo binary.ByteOrder) *gguf {
return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
}
func (llm *gguf) KV() KV {
return llm.kv
}
func (llm *gguf) Tensors() Tensors {
return llm.tensors
return Tensors{
Items: llm.tensors,
Offset: llm.tensorOffset,
}
}
func (llm *gguf) numTensor() uint64 {
......@@ -199,7 +204,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
return fmt.Errorf("failed to read tensor dimensions: %w", err)
}
shape := [4]uint64{1, 1, 1, 1}
shape := make([]uint64, dims)
for i := 0; uint32(i) < dims; i++ {
shape[i], err = readGGUF[uint64](llm, rs)
if err != nil {
......@@ -236,13 +241,21 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
alignment = 32
}
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
padding := ggufPadding(offset, int64(alignment))
llm.tensorOffset = uint64(offset + padding)
for _, tensor := range llm.tensors {
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return fmt.Errorf("failed to get current offset: %w", err)
}
padding := llm.padding(offset, int64(alignment))
padding := ggufPadding(offset, int64(alignment))
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
return fmt.Errorf("failed to seek to init padding: %w", err)
}
......@@ -261,12 +274,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
return t, err
}
func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
func writeGGUF[V any](w io.Writer, t uint32, v V) error {
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
return err
}
return binary.Write(w, llm.ByteOrder, v)
return binary.Write(w, binary.LittleEndian, v)
}
func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
......@@ -330,12 +343,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) {
return string(buf), nil
}
func writeGGUFString(llm *gguf, w io.Writer, s string) error {
if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
func writeGGUFString(w io.Writer, s string) error {
if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil {
return err
}
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
return err
}
......@@ -476,223 +489,175 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
return a, nil
}
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
// writeGGUFArray writes a slice s of type E to the write with a gguf type of t
func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
return err
}
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
return err
}
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
return err
}
for _, e := range s {
if err := binary.Write(w, llm.ByteOrder, e); err != nil {
return err
}
}
return nil
}
var ggufKVOrder = map[string][]string{
"llama": {
"general.architecture",
"general.name",
"llama.vocab_size",
"llama.context_length",
"llama.embedding_length",
"llama.block_count",
"llama.feed_forward_length",
"llama.attention.head_count",
"llama.attention.head_count_kv",
"llama.attention.layer_norm_rms_epsilon",
"llama.rope.freq_base",
"llama.rope.dimension_count",
"llama.expert_count",
"llama.expert_used_count",
"gemma.context_length",
"gemma.embedding_length",
"gemma.block_count",
"gemma.feed_forward_length",
"gemma.attention.head_count",
"gemma.attention.head_count_kv",
"gemma.attention.layer_norm_rms_epsilon",
"gemma.attention.key_length",
"gemma.attention.value_length",
"general.file_type",
"tokenizer.ggml.pre",
"tokenizer.ggml.model",
"tokenizer.ggml.tokens",
"tokenizer.ggml.scores",
"tokenizer.ggml.merges",
"tokenizer.ggml.token_type",
"tokenizer.ggml.bos_token_id",
"tokenizer.ggml.eos_token_id",
"tokenizer.ggml.unknown_token_id",
"tokenizer.ggml.padding_token_id",
"tokenizer.ggml.add_bos_token",
"tokenizer.ggml.add_eos_token",
"tokenizer.chat_template",
"bert.pooling_type",
},
return binary.Write(w, binary.LittleEndian, s)
}
func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
switch llm.Version {
case 3:
llm.V3.NumTensor = uint64(len(tensors))
llm.V3.NumKV = uint64(len(kv))
default:
return fmt.Errorf("not implemented: ggufv%d", llm.Version)
}
if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil {
func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
return err
}
if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil {
if err := binary.Write(ws, binary.LittleEndian, uint32(3)); err != nil {
return err
}
if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil {
if err := binary.Write(ws, binary.LittleEndian, uint64(len(ts))); err != nil {
return err
}
if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil {
if err := binary.Write(ws, binary.LittleEndian, uint64(len(kv))); err != nil {
return err
}
kvCheck := make(map[string]bool)
for k := range kv {
kvCheck[k] = false
keys := maps.Keys(kv)
slices.Sort(keys)
for _, key := range keys {
if err := ggufWriteKV(ws, key, kv[key]); err != nil {
return err
}
}
for _, k := range ggufKVOrder["llama"] {
v, ok := kv[k]
if !ok {
continue
slices.SortFunc(ts, func(a, b Tensor) int {
var i, j int
if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 {
return cmp.Compare(a.Name, b.Name)
} else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 {
return cmp.Compare(a.Name, b.Name)
}
kvCheck[k] = true
if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
return cmp.Compare(i, j)
})
var s uint64
for _, t := range ts {
t.Offset = s
if err := ggufWriteTensorInfo(ws, t); err != nil {
return err
}
s += t.Size()
}
if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil {
var alignment int64 = 32
for _, t := range ts {
if err := ggufWriteTensor(ws, t, alignment); err != nil {
return err
}
}
var err error
switch v := v.(type) {
case uint32:
err = writeGGUF(llm, ws, ggufTypeUint32, v)
case float32:
err = writeGGUF(llm, ws, ggufTypeFloat32, v)
case bool:
err = writeGGUF(llm, ws, ggufTypeBool, v)
case string:
err = writeGGUFString(llm, ws, v)
case []int32:
err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
case []uint32:
err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
case []float32:
err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
case []string:
if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
return err
}
if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil {
return err
}
if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil {
return err
}
for _, e := range v {
if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil {
return err
}
return nil
}
if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil {
return err
}
}
default:
return fmt.Errorf("improper type for '%s'", k)
}
if err != nil {
return err
}
func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
slog.Debug(k, "type", fmt.Sprintf("%T", v))
if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil {
return err
}
for k, v := range kvCheck {
if !v {
return fmt.Errorf("Didn't know how to write kv %s", k)
}
if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil {
return err
}
for _, tensor := range tensors {
if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
var err error
switch v := v.(type) {
case uint32:
err = writeGGUF(ws, ggufTypeUint32, v)
case float32:
err = writeGGUF(ws, ggufTypeFloat32, v)
case bool:
err = writeGGUF(ws, ggufTypeBool, v)
case string:
err = writeGGUFString(ws, v)
case []int32:
err = writeGGUFArray(ws, ggufTypeInt32, v)
case []uint32:
err = writeGGUFArray(ws, ggufTypeUint32, v)
case []float32:
err = writeGGUFArray(ws, ggufTypeFloat32, v)
case []string:
if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
return err
}
if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil {
if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
return err
}
var dims int
for cnt := range len(tensor.Shape) {
if tensor.Shape[cnt] > 0 {
dims++
}
}
if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil {
if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for i := range dims {
if err := binary.Write(ws, llm.ByteOrder, tensor.Shape[dims-1-i]); err != nil {
for _, e := range v {
if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil {
return err
}
}
if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil {
return err
if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil {
return err
}
}
default:
return fmt.Errorf("improper type for '%s'", k)
}
if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil {
return err
}
return err
}
func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
return err
}
var alignment int64 = 32
for _, tensor := range tensors {
offset, err := ws.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil {
return err
}
padding := llm.padding(offset, alignment)
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil {
return err
}
if _, err := tensor.WriteTo(ws); err != nil {
for i := range len(t.Shape) {
if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil {
return err
}
}
return nil
if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil {
return err
}
return binary.Write(ws, binary.LittleEndian, t.Offset)
}
func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
offset, err := ws.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(offset, alignment)))); err != nil {
return err
}
_, err = t.WriteTo(ws)
return err
}
func (gguf) padding(offset, align int64) int64 {
func ggufPadding(offset, align int64) int64 {
return (align - offset%align) % align
}
......@@ -2,7 +2,6 @@ package llm
import (
"bytes"
"encoding/binary"
"fmt"
"os"
"testing"
......@@ -20,7 +19,6 @@ func TestEstimateGPULayers(t *testing.T) {
f, err := os.CreateTemp(t.TempDir(), modelName)
require.NoError(t, err)
defer f.Close()
gguf := NewGGUFV3(binary.LittleEndian)
inputLayerCount := 5
tensors := []Tensor{
......@@ -32,7 +30,7 @@ func TestEstimateGPULayers(t *testing.T) {
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
}
assert.Len(t, tensors, inputLayerCount+1)
err = gguf.Encode(f, KV{
err = WriteGGUF(f, KV{
"general.architecture": "llama",
"general.name": "name",
"llama.context_length": uint32(32),
......
......@@ -81,112 +81,43 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
return layers, nil
}
func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error {
stat, err := file.Stat()
if err != nil {
return err
}
r, err := zip.NewReader(file, stat.Size())
if err != nil {
return err
}
fn(api.ProgressResponse{Status: "unpacking model metadata"})
for _, f := range r.File {
if !filepath.IsLocal(f.Name) {
return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name)
}
n := filepath.Join(p, f.Name)
if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil {
return err
}
// TODO(mxyng): this should not write out all files to disk
outfile, err := os.Create(n)
if err != nil {
return err
}
defer outfile.Close()
infile, err := f.Open()
if err != nil {
return err
}
defer infile.Close()
if _, err = io.Copy(outfile, infile); err != nil {
return err
}
if err := outfile.Close(); err != nil {
return err
}
if err := infile.Close(); err != nil {
return err
}
}
return nil
}
func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
tempDir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
if err != nil {
return nil, err
}
defer os.RemoveAll(tempDir)
if err := extractFromZipFile(tempDir, file, fn); err != nil {
return nil, err
}
mf, err := convert.GetModelFormat(tempDir)
func parseFromZipFile(_ context.Context, f *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
fi, err := f.Stat()
if err != nil {
return nil, err
}
params, err := mf.GetParams(tempDir)
r, err := zip.NewReader(f, fi.Size())
if err != nil {
return nil, err
}
mArch, err := mf.GetModelArch("", tempDir, params)
p, err := os.MkdirTemp(filepath.Dir(f.Name()), "")
if err != nil {
return nil, err
}
fn(api.ProgressResponse{Status: "processing tensors"})
if err := mArch.GetTensors(); err != nil {
return nil, err
}
if err := mArch.LoadVocab(); err != nil {
return nil, err
}
defer os.RemoveAll(p)
fn(api.ProgressResponse{Status: "converting model"})
// TODO(mxyng): this should write directly into a layer
// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
temp, err := os.CreateTemp(tempDir, "fp16")
t, err := os.CreateTemp(p, "fp16")
if err != nil {
return nil, err
}
defer temp.Close()
defer os.Remove(temp.Name())
defer t.Close()
defer os.Remove(t.Name())
if err = mArch.WriteGGUF(temp); err != nil {
fn(api.ProgressResponse{Status: "converting model"})
if err := convert.Convert(convert.NewZipReader(r, p, 32<<20), t); err != nil {
return nil, err
}
if _, err := temp.Seek(0, io.SeekStart); err != nil {
if _, err := t.Seek(0, io.SeekStart); err != nil {
return nil, err
}
layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
layer, err := NewLayer(t, "application/vnd.ollama.image.model")
if err != nil {
return nil, err
}
......
package server
import (
"archive/zip"
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"slices"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
......@@ -18,103 +13,6 @@ import (
"github.com/ollama/ollama/template"
)
func createZipFile(t *testing.T, name string) *os.File {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), "")
if err != nil {
t.Fatal(err)
}
zf := zip.NewWriter(f)
defer zf.Close()
zh, err := zf.CreateHeader(&zip.FileHeader{Name: name})
if err != nil {
t.Fatal(err)
}
if _, err := io.Copy(zh, bytes.NewReader([]byte(""))); err != nil {
t.Fatal(err)
}
return f
}
func TestExtractFromZipFile(t *testing.T) {
cases := []struct {
name string
expect []string
err error
}{
{
name: "good",
expect: []string{"good"},
},
{
name: strings.Join([]string{"path", "..", "to", "good"}, string(os.PathSeparator)),
expect: []string{filepath.Join("to", "good")},
},
{
name: strings.Join([]string{"path", "..", "to", "..", "good"}, string(os.PathSeparator)),
expect: []string{"good"},
},
{
name: strings.Join([]string{"path", "to", "..", "..", "good"}, string(os.PathSeparator)),
expect: []string{"good"},
},
{
name: strings.Join([]string{"..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "..", "bad"}, string(os.PathSeparator)),
err: zip.ErrInsecurePath,
},
{
name: strings.Join([]string{"path", "..", "..", "to", "bad"}, string(os.PathSeparator)),
err: zip.ErrInsecurePath,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
f := createZipFile(t, tt.name)
defer f.Close()
tempDir := t.TempDir()
if err := extractFromZipFile(tempDir, f, func(api.ProgressResponse) {}); !errors.Is(err, tt.err) {
t.Fatal(err)
}
var matches []string
if err := filepath.Walk(tempDir, func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
if !fi.IsDir() {
matches = append(matches, p)
}
return nil
}); err != nil {
t.Fatal(err)
}
var actual []string
for _, match := range matches {
rel, err := filepath.Rel(tempDir, match)
if err != nil {
t.Error(err)
}
actual = append(actual, rel)
}
if !slices.Equal(actual, tt.expect) {
t.Fatalf("expected %d files, got %d", len(tt.expect), len(matches))
}
})
}
}
func readFile(t *testing.T, base, name string) *bytes.Buffer {
t.Helper()
......
......@@ -2,7 +2,6 @@ package server
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
......@@ -29,7 +28,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
}
defer f.Close()
if err := llm.NewGGUFV3(binary.LittleEndian).Encode(f, kv, ti); err != nil {
if err := llm.WriteGGUF(f, kv, ti); err != nil {
t.Fatal(err)
}
......
......@@ -3,7 +3,6 @@ package server
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"log/slog"
"os"
......@@ -114,8 +113,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
require.NoError(t, err)
defer f.Close()
gguf := llm.NewGGUFV3(binary.LittleEndian)
err = gguf.Encode(f, llm.KV{
require.NoError(t, llm.WriteGGUF(f, llm.KV{
"general.architecture": "llama",
"general.name": "name",
"llama.context_length": uint32(32),
......@@ -129,7 +127,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
}, []llm.Tensor{
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
})
}))
require.NoError(t, err)
fname := f.Name()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment