Unverified Commit 6a6828bd authored by Michael Yang's avatar Michael Yang Committed by GitHub
Browse files

Merge pull request #167 from jmorganca/decode-ggml

partial decode ggml bin for more info
parents 21e6197c fccf8d17
package llama package llm
import ( import (
"bytes" "bytes"
......
package llm
import (
"fmt"
"os"
"github.com/jmorganca/ollama/api"
)
type LLM interface {
Predict([]int, string, func(api.GenerateResponse)) error
Embedding(string) ([]float64, error)
Encode(string) []int
Decode(...int) string
SetOptions(api.Options)
Close()
}
func New(model string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
}
f, err := os.Open(model)
if err != nil {
return nil, err
}
ggml, err := DecodeGGML(f, ModelFamilyLlama)
if err != nil {
return nil, err
}
switch ggml.ModelFamily {
case ModelFamilyLlama:
return newLlama(model, opts)
default:
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily)
}
}
package llama package llm
import ( import (
"fmt" "fmt"
......
...@@ -19,7 +19,7 @@ import ( ...@@ -19,7 +19,7 @@ import (
"strings" "strings"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llama" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser" "github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/vector" "github.com/jmorganca/ollama/vector"
) )
...@@ -99,9 +99,14 @@ type LayerReader struct { ...@@ -99,9 +99,14 @@ type LayerReader struct {
} }
type ConfigV2 struct { type ConfigV2 struct {
ModelFamily llm.ModelFamily `json:"model_family"`
ModelType llm.ModelType `json:"model_type"`
FileType llm.FileType `json:"file_type"`
RootFS RootFS `json:"rootfs"`
// required by spec
Architecture string `json:"architecture"` Architecture string `json:"architecture"`
OS string `json:"os"` OS string `json:"os"`
RootFS RootFS `json:"rootfs"`
} }
type RootFS struct { type RootFS struct {
...@@ -246,6 +251,11 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api ...@@ -246,6 +251,11 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return err return err
} }
config := ConfigV2{
Architecture: "amd64",
OS: "linux",
}
var layers []*LayerReader var layers []*LayerReader
params := make(map[string][]string) params := make(map[string][]string)
embed := EmbeddingParams{fn: fn, opts: api.DefaultOptions()} embed := EmbeddingParams{fn: fn, opts: api.DefaultOptions()}
...@@ -284,6 +294,18 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api ...@@ -284,6 +294,18 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
} }
defer file.Close() defer file.Close()
ggml, err := llm.DecodeGGML(file, llm.ModelFamilyLlama)
if err != nil {
return err
}
config.ModelFamily = ggml.ModelFamily
config.ModelType = ggml.ModelType
config.FileType = ggml.FileType
// reset the file
file.Seek(0, io.SeekStart)
l, err := CreateLayer(file) l, err := CreateLayer(file)
if err != nil { if err != nil {
return fmt.Errorf("failed to create layer: %v", err) return fmt.Errorf("failed to create layer: %v", err)
...@@ -292,6 +314,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api ...@@ -292,6 +314,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
layers = append(layers, l) layers = append(layers, l)
} }
} }
if mf != nil { if mf != nil {
log.Printf("manifest = %#v", mf) log.Printf("manifest = %#v", mf)
for _, l := range mf.Layers { for _, l := range mf.Layers {
...@@ -321,7 +344,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api ...@@ -321,7 +344,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
layers = append(layers, layer) layers = append(layers, layer)
case "template", "system", "prompt": case "template", "system", "prompt":
fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)}) fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})
// remove the prompt layer if one exists // remove the layer if one exists
mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
layers = removeLayerFromLayers(layers, mediaType) layers = removeLayerFromLayers(layers, mediaType)
...@@ -383,7 +406,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api ...@@ -383,7 +406,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
// Create a layer for the config object // Create a layer for the config object
fn(api.ProgressResponse{Status: "creating config layer"}) fn(api.ProgressResponse{Status: "creating config layer"})
cfg, err := createConfigLayer(digests) cfg, err := createConfigLayer(config, digests)
if err != nil { if err != nil {
return err return err
} }
...@@ -430,13 +453,13 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) { ...@@ -430,13 +453,13 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
} }
e.opts.EmbeddingOnly = true e.opts.EmbeddingOnly = true
llm, err := llama.New(e.model, e.opts) llmModel, err := llm.New(e.model, e.opts)
if err != nil { if err != nil {
return nil, fmt.Errorf("load model to generate embeddings: %v", err) return nil, fmt.Errorf("load model to generate embeddings: %v", err)
} }
defer func() { defer func() {
if llm != nil { if llmModel != nil {
llm.Close() llmModel.Close()
} }
}() }()
...@@ -480,7 +503,7 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) { ...@@ -480,7 +503,7 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
Total: len(data) - 1, Total: len(data) - 1,
Completed: i, Completed: i,
}) })
embed, err := llm.Embedding(d) embed, err := llmModel.Embedding(d)
if err != nil { if err != nil {
log.Printf("failed to generate embedding for '%s' line %d: %v", filePath, i+1, err) log.Printf("failed to generate embedding for '%s' line %d: %v", filePath, i+1, err)
continue continue
...@@ -676,7 +699,7 @@ func getLayerDigests(layers []*LayerReader) ([]string, error) { ...@@ -676,7 +699,7 @@ func getLayerDigests(layers []*LayerReader) ([]string, error) {
// CreateLayer creates a Layer object from a given file // CreateLayer creates a Layer object from a given file
func CreateLayer(f io.ReadSeeker) (*LayerReader, error) { func CreateLayer(f io.ReadSeeker) (*LayerReader, error) {
digest, size := GetSHA256Digest(f) digest, size := GetSHA256Digest(f)
f.Seek(0, 0) f.Seek(0, io.SeekStart)
layer := &LayerReader{ layer := &LayerReader{
Layer: Layer{ Layer: Layer{
...@@ -768,10 +791,6 @@ func DeleteModel(name string) error { ...@@ -768,10 +791,6 @@ func DeleteModel(name string) error {
return err return err
} }
if err != nil {
return err
}
// only delete the files which are still in the deleteMap // only delete the files which are still in the deleteMap
for k, v := range deleteMap { for k, v := range deleteMap {
if v { if v {
...@@ -970,15 +989,10 @@ func pullModelManifest(mp ModelPath, regOpts *RegistryOptions) (*ManifestV2, err ...@@ -970,15 +989,10 @@ func pullModelManifest(mp ModelPath, regOpts *RegistryOptions) (*ManifestV2, err
return m, err return m, err
} }
func createConfigLayer(layers []string) (*LayerReader, error) { func createConfigLayer(config ConfigV2, layers []string) (*LayerReader, error) {
// TODO change architecture and OS config.RootFS = RootFS{
config := ConfigV2{ Type: "layers",
Architecture: "arm64", DiffIDs: layers,
OS: "linux",
RootFS: RootFS{
Type: "layers",
DiffIDs: layers,
},
} }
configJSON, err := json.Marshal(config) configJSON, err := json.Marshal(config)
......
...@@ -21,14 +21,14 @@ import ( ...@@ -21,14 +21,14 @@ import (
"gonum.org/v1/gonum/mat" "gonum.org/v1/gonum/mat"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llama" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/vector" "github.com/jmorganca/ollama/vector"
) )
var loaded struct { var loaded struct {
mu sync.Mutex mu sync.Mutex
llm *llama.LLM llm llm.LLM
Embeddings []vector.Embedding Embeddings []vector.Embedding
expireAt time.Time expireAt time.Time
...@@ -63,11 +63,16 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur ...@@ -63,11 +63,16 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
loaded.Embeddings = model.Embeddings loaded.Embeddings = model.Embeddings
} }
llm, err := llama.New(model.ModelPath, opts) llmModel, err := llm.New(model.ModelPath, opts)
if err != nil { if err != nil {
return err return err
} }
// set cache values before modifying opts
loaded.llm = llmModel
loaded.digest = model.Digest
loaded.options = opts
if opts.NumKeep < 0 { if opts.NumKeep < 0 {
promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "") promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
if err != nil { if err != nil {
...@@ -79,15 +84,13 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur ...@@ -79,15 +84,13 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
return err return err
} }
tokensWithSystem := llm.Encode(promptWithSystem) tokensWithSystem := llmModel.Encode(promptWithSystem)
tokensNoSystem := llm.Encode(promptNoSystem) tokensNoSystem := llmModel.Encode(promptNoSystem)
llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1 opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
}
loaded.llm = llm llmModel.SetOptions(opts)
loaded.digest = model.Digest }
loaded.options = opts
} }
loaded.expireAt = time.Now().Add(sessionDuration) loaded.expireAt = time.Now().Add(sessionDuration)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment