Commit 5e9db9fb authored by Michael Yang's avatar Michael Yang
Browse files

refactor convert

parent 6b252918
...@@ -143,30 +143,6 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a ...@@ -143,30 +143,6 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a
return nil, err return nil, err
} }
mf, err := convert.GetModelFormat(tempDir)
if err != nil {
return nil, err
}
params, err := mf.GetParams(tempDir)
if err != nil {
return nil, err
}
mArch, err := mf.GetModelArch("", tempDir, params)
if err != nil {
return nil, err
}
fn(api.ProgressResponse{Status: "processing tensors"})
if err := mArch.GetTensors(); err != nil {
return nil, err
}
if err := mArch.LoadVocab(); err != nil {
return nil, err
}
fn(api.ProgressResponse{Status: "converting model"}) fn(api.ProgressResponse{Status: "converting model"})
// TODO(mxyng): this should write directly into a layer // TODO(mxyng): this should write directly into a layer
...@@ -178,7 +154,7 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a ...@@ -178,7 +154,7 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a
defer temp.Close() defer temp.Close()
defer os.Remove(temp.Name()) defer os.Remove(temp.Name())
if err = mArch.WriteGGUF(temp); err != nil { if err := convert.Convert(tempDir, temp); err != nil {
return nil, err return nil, err
} }
......
...@@ -2,7 +2,6 @@ package server ...@@ -2,7 +2,6 @@ package server
import ( import (
"bytes" "bytes"
"encoding/binary"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
...@@ -20,7 +19,7 @@ import ( ...@@ -20,7 +19,7 @@ import (
var stream bool = false var stream bool = false
func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { func createBinFile(t *testing.T, kv map[string]any, ti []*llm.Tensor) string {
t.Helper() t.Helper()
f, err := os.CreateTemp(t.TempDir(), "") f, err := os.CreateTemp(t.TempDir(), "")
...@@ -29,7 +28,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { ...@@ -29,7 +28,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
} }
defer f.Close() defer f.Close()
if err := llm.NewGGUFV3(binary.LittleEndian).Encode(f, kv, ti); err != nil { if err := llm.WriteGGUF(f, kv, ti); err != nil {
t.Fatal(err) t.Fatal(err)
} }
......
...@@ -101,7 +101,7 @@ func TestGenerateChat(t *testing.T) { ...@@ -101,7 +101,7 @@ func TestGenerateChat(t *testing.T) {
"tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.tokens": []string{""},
"tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.scores": []float32{0},
"tokenizer.ggml.token_type": []int32{0}, "tokenizer.ggml.token_type": []int32{0},
}, []llm.Tensor{ }, []*llm.Tensor{
{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
...@@ -149,7 +149,7 @@ func TestGenerateChat(t *testing.T) { ...@@ -149,7 +149,7 @@ func TestGenerateChat(t *testing.T) {
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert", "general.architecture": "bert",
"bert.pooling_type": uint32(0), "bert.pooling_type": uint32(0),
}, []llm.Tensor{})), }, []*llm.Tensor{})),
Stream: &stream, Stream: &stream,
}) })
...@@ -399,7 +399,7 @@ func TestGenerate(t *testing.T) { ...@@ -399,7 +399,7 @@ func TestGenerate(t *testing.T) {
"tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.tokens": []string{""},
"tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.scores": []float32{0},
"tokenizer.ggml.token_type": []int32{0}, "tokenizer.ggml.token_type": []int32{0},
}, []llm.Tensor{ }, []*llm.Tensor{
{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
...@@ -447,7 +447,7 @@ func TestGenerate(t *testing.T) { ...@@ -447,7 +447,7 @@ func TestGenerate(t *testing.T) {
Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
"general.architecture": "bert", "general.architecture": "bert",
"bert.pooling_type": uint32(0), "bert.pooling_type": uint32(0),
}, []llm.Tensor{})), }, []*llm.Tensor{})),
Stream: &stream, Stream: &stream,
}) })
......
...@@ -3,7 +3,6 @@ package server ...@@ -3,7 +3,6 @@ package server
import ( import (
"bytes" "bytes"
"context" "context"
"encoding/binary"
"fmt" "fmt"
"log/slog" "log/slog"
"os" "os"
...@@ -114,8 +113,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est ...@@ -114,8 +113,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
require.NoError(t, err) require.NoError(t, err)
defer f.Close() defer f.Close()
gguf := llm.NewGGUFV3(binary.LittleEndian) require.NoError(t, llm.WriteGGUF(f, llm.KV{
err = gguf.Encode(f, llm.KV{
"general.architecture": "llama", "general.architecture": "llama",
"general.name": "name", "general.name": "name",
"llama.context_length": uint32(32), "llama.context_length": uint32(32),
...@@ -126,10 +124,10 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est ...@@ -126,10 +124,10 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
"tokenizer.ggml.tokens": []string{" "}, "tokenizer.ggml.tokens": []string{" "},
"tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.scores": []float32{0},
"tokenizer.ggml.token_type": []int32{0}, "tokenizer.ggml.token_type": []int32{0},
}, []llm.Tensor{ }, []*llm.Tensor{
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
}) }))
require.NoError(t, err) require.NoError(t, err)
fname := f.Name() fname := f.Name()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment