Unverified Commit 33ee7168 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Add experimental MLX backend and engine with imagegen support (#13648)



* WIP - MLX backend with gemma3

* MLX: add cmake and go tag build toggles

To build the new MLX backend code:
  cmake --preset MLX
  cmake --build --preset MLX --parallel
  cmake --install build --component MLX
  go build -tags mlx .

Note: the main.go entrypoint for the MLX engine will change in a follow up commit.

* add experimental image generation runtime

* add experimental image generation runtime

* MLX: wire up cuda build for linux

* MLX: get dependencies correct and dedup

This is still too large for a unified github artifact, but is now "correct" for the mlx_cuda_v13
directory.

* fix relative link bug in dedup

* Add darwin build and readme

* add go build tag for mlx dependent code and wire up build_darwin.sh

* lint cleanup

* macos: build mlx for x86

This will be CPU only.

* cuda build instructions and fix drift from mlx bump

* stale comment

* Delete agent helper doc

* Clean up readme.md

* Revise README for tokenizer clarity and details

Updated README to clarify tokenizer functionality and removed correctness section.

---------
Co-authored-by: default avatarjmorganca <jmorganca@gmail.com>
parent 34d0c55e
...@@ -2,6 +2,22 @@ cmake_minimum_required(VERSION 3.21) ...@@ -2,6 +2,22 @@ cmake_minimum_required(VERSION 3.21)
project(Ollama C CXX) project(Ollama C CXX)
# Handle cross-compilation on macOS: when CMAKE_OSX_ARCHITECTURES is set to a
# single architecture different from the host, override CMAKE_SYSTEM_PROCESSOR
# to match. This is necessary because CMAKE_SYSTEM_PROCESSOR defaults to the
# host architecture, but downstream projects (like MLX) use it to detect the
# target architecture.
if(CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES ";")
# Single architecture specified
if(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
message(STATUS "Cross-compiling for x86_64: overriding CMAKE_SYSTEM_PROCESSOR from ${CMAKE_SYSTEM_PROCESSOR} to x86_64")
set(CMAKE_SYSTEM_PROCESSOR "x86_64")
elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
message(STATUS "Cross-compiling for arm64: overriding CMAKE_SYSTEM_PROCESSOR from ${CMAKE_SYSTEM_PROCESSOR} to arm64")
set(CMAKE_SYSTEM_PROCESSOR "arm64")
endif()
endif()
include(CheckLanguage) include(CheckLanguage)
include(GNUInstallDirs) include(GNUInstallDirs)
...@@ -12,7 +28,7 @@ set(BUILD_SHARED_LIBS ON) ...@@ -12,7 +28,7 @@ set(BUILD_SHARED_LIBS ON)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS ON) # Recent versions of MLX Requires gnu++17 extensions to compile properly
set(GGML_BUILD ON) set(GGML_BUILD ON)
set(GGML_SHARED ON) set(GGML_SHARED ON)
...@@ -147,14 +163,48 @@ if(CMAKE_HIP_COMPILER) ...@@ -147,14 +163,48 @@ if(CMAKE_HIP_COMPILER)
endif() endif()
endif() endif()
find_package(Vulkan) if(NOT APPLE)
if(Vulkan_FOUND) find_package(Vulkan)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-vulkan) if(Vulkan_FOUND)
install(TARGETS ggml-vulkan add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-vulkan)
install(TARGETS ggml-vulkan
RUNTIME_DEPENDENCIES
PRE_INCLUDE_REGEXES vulkan
PRE_EXCLUDE_REGEXES ".*"
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
)
endif()
endif()
option(MLX_ENGINE "Enable MLX backend" OFF)
if(MLX_ENGINE)
message(STATUS "Setting up MLX (this takes a while...)")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/x/ml/backend/mlx)
# Find CUDA toolkit if MLX is built with CUDA support
find_package(CUDAToolkit)
install(TARGETS mlx mlxc
RUNTIME_DEPENDENCIES RUNTIME_DEPENDENCIES
PRE_INCLUDE_REGEXES vulkan DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc cudnn nccl
PRE_EXCLUDE_REGEXES ".*" PRE_EXCLUDE_REGEXES ".*"
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
) )
endif()
# Manually install cudart and cublas since they might not be picked up as direct dependencies
if(CUDAToolkit_FOUND)
file(GLOB CUDART_LIBS
"${CUDAToolkit_LIBRARY_DIR}/libcudart.so*"
"${CUDAToolkit_LIBRARY_DIR}/libcublas.so*")
if(CUDART_LIBS)
install(FILES ${CUDART_LIBS}
DESTINATION ${OLLAMA_INSTALL_DIR}
COMPONENT MLX)
endif()
endif()
endif()
\ No newline at end of file
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
"inherits": [ "CUDA" ], "inherits": [ "CUDA" ],
"cacheVariables": { "cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "75-virtual;80-virtual;86-virtual;87-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;103-virtual;110-virtual;120-virtual;121-virtual", "CMAKE_CUDA_ARCHITECTURES": "75-virtual;80-virtual;86-virtual;87-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;103-virtual;110-virtual;120-virtual;121-virtual",
"CMAKE_CUDA_FLAGS": "-t 2", "CMAKE_CUDA_FLAGS": "-t 4",
"OLLAMA_RUNNER_DIR": "cuda_v13" "OLLAMA_RUNNER_DIR": "cuda_v13"
} }
}, },
...@@ -83,6 +83,28 @@ ...@@ -83,6 +83,28 @@
"cacheVariables": { "cacheVariables": {
"OLLAMA_RUNNER_DIR": "vulkan" "OLLAMA_RUNNER_DIR": "vulkan"
} }
},
{
"name": "MLX",
"inherits": [ "Default" ],
"cacheVariables": {
"MLX_ENGINE": "ON",
"OLLAMA_RUNNER_DIR": "mlx"
}
},
{
"name": "MLX CUDA 12",
"inherits": [ "MLX", "CUDA 12" ],
"cacheVariables": {
"OLLAMA_RUNNER_DIR": "mlx_cuda_v12"
}
},
{
"name": "MLX CUDA 13",
"inherits": [ "MLX", "CUDA 13" ],
"cacheVariables": {
"OLLAMA_RUNNER_DIR": "mlx_cuda_v13"
}
} }
], ],
"buildPresets": [ "buildPresets": [
...@@ -140,6 +162,21 @@ ...@@ -140,6 +162,21 @@
"name": "Vulkan", "name": "Vulkan",
"targets": [ "ggml-vulkan" ], "targets": [ "ggml-vulkan" ],
"configurePreset": "Vulkan" "configurePreset": "Vulkan"
},
{
"name": "MLX",
"targets": [ "mlx", "mlxc" ],
"configurePreset": "MLX"
},
{
"name": "MLX CUDA 12",
"targets": [ "mlx", "mlxc" ],
"configurePreset": "MLX CUDA 12"
},
{
"name": "MLX CUDA 13",
"targets": [ "mlx", "mlxc" ],
"configurePreset": "MLX CUDA 13"
} }
] ]
} }
...@@ -131,7 +131,39 @@ COPY ml/backend/ggml/ggml ml/backend/ggml/ggml ...@@ -131,7 +131,39 @@ COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'Vulkan' \ cmake --preset 'Vulkan' \
&& cmake --build --parallel --preset 'Vulkan' \ && cmake --build --parallel --preset 'Vulkan' \
&& cmake --install build --component Vulkan --strip --parallel 8 && cmake --install build --component Vulkan --strip --parallel 8
FROM base AS mlx
ARG CUDA13VERSION=13.0
RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-} \
&& dnf install -y openblas-devel lapack-devel \
&& dnf install -y libcudnn9-cuda-13 libcudnn9-devel-cuda-13 \
&& dnf install -y libnccl libnccl-devel
ENV PATH=/usr/local/cuda-13/bin:$PATH
ENV BLAS_INCLUDE_DIRS=/usr/include/openblas
ENV LAPACK_INCLUDE_DIRS=/usr/include/openblas
ENV CGO_LDFLAGS="-L/usr/local/cuda-13/lib64 -L/usr/local/cuda-13/targets/x86_64-linux/lib/stubs"
ARG PARALLEL
WORKDIR /go/src/github.com/ollama/ollama
COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
COPY x/ml/backend/mlx x/ml/backend/mlx
COPY go.mod go.sum .
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
ENV PATH=/usr/local/go/bin:$PATH
RUN go mod download
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
&& cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
&& cmake --install build --component MLX --strip --parallel ${PARALLEL}
COPY . .
ARG GOFLAGS="'-ldflags=-w -s'"
ENV CGO_ENABLED=1
ARG CGO_CFLAGS
ARG CGO_CXXFLAGS
# TODO wire up the actual MLX engine here instead of building the main binary...
RUN mkdir -p dist/bin
RUN go build -tags mlx -trimpath -buildmode=pie -o dist/bin/imagegen ./x/imagegen/cmd/engine
FROM base AS build FROM base AS build
...@@ -153,6 +185,8 @@ FROM --platform=linux/amd64 scratch AS amd64 ...@@ -153,6 +185,8 @@ FROM --platform=linux/amd64 scratch AS amd64
COPY --from=cuda-12 dist/lib/ollama /lib/ollama/ COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/ COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
COPY --from=vulkan dist/lib/ollama /lib/ollama/ COPY --from=vulkan dist/lib/ollama /lib/ollama/
COPY --from=mlx /go/src/github.com/ollama/ollama/dist/lib/ollama /lib/ollama/
COPY --from=mlx /go/src/github.com/ollama/ollama/dist/bin/ /bin/
FROM --platform=linux/arm64 scratch AS arm64 FROM --platform=linux/arm64 scratch AS arm64
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/ # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
......
...@@ -6,11 +6,14 @@ import ( ...@@ -6,11 +6,14 @@ import (
"errors" "errors"
"fmt" "fmt"
"io/fs" "io/fs"
"iter"
"log/slog" "log/slog"
"maps"
"os" "os"
"slices" "slices"
"strings" "strings"
ofs "github.com/ollama/ollama/fs"
"github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/fs/ggml"
) )
...@@ -18,8 +21,13 @@ type ModelParameters struct { ...@@ -18,8 +21,13 @@ type ModelParameters struct {
Architectures []string `json:"architectures"` Architectures []string `json:"architectures"`
VocabSize uint32 `json:"vocab_size"` VocabSize uint32 `json:"vocab_size"`
// TODO is this needed?
ModelType string `json:"model_type"`
TextModel struct { TextModel struct {
VocabSize uint32 `json:"vocab_size"` VocabSize uint32 `json:"vocab_size"`
HiddenSize uint32 `json:"hidden_size"`
ModelType string `json:"model_type"`
} `json:"text_config"` } `json:"text_config"`
} }
...@@ -33,8 +41,94 @@ type AdapterParameters struct { ...@@ -33,8 +41,94 @@ type AdapterParameters struct {
} `json:"lora_parameters"` } `json:"lora_parameters"`
} }
func (ModelParameters) KV(t *Tokenizer) ggml.KV { type KV map[string]any
kv := ggml.KV{
func (kv KV) Architecture() string {
return kv.String("general.architecture", "unknown")
}
type valueTypes interface {
uint8 | int8 | uint16 | int16 |
uint32 | int32 | uint64 | int64 |
string | float32 | float64 | bool
}
type arrayValueTypes interface {
[]uint8 | []int8 | []uint16 | []int16 |
[]uint32 | []int32 | []uint64 | []int64 |
[]string | []float32 | []float64 | []bool
}
func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ...T) (T, bool) {
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
key = kv.Architecture() + "." + key
}
if val, ok := kv[key].(T); ok {
return val, true
}
return defaultValue[0], false
}
func (kv KV) String(key string, defaultValue ...string) string {
val, _ := keyValue(kv, key, append(defaultValue, "")...)
return val
}
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
return val
}
func (kv KV) Float(key string, defaultValue ...float32) float32 {
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
return val
}
func (kv KV) Bool(key string, defaultValue ...bool) bool {
val, _ := keyValue(kv, key, append(defaultValue, false)...)
return val
}
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
val, _ := keyValue(kv, key, append(defaultValue, []string{""})...)
return val
}
func (kv KV) Ints(key string, defaultValue ...[]int32) []int32 {
val, _ := keyValue(kv, key, append(defaultValue, []int32{0})...)
return val
}
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
val, _ := keyValue(kv, key, append(defaultValue, []uint32{0})...)
return val
}
func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 {
val, _ := keyValue(kv, key, append(defaultValue, []float32{0})...)
return val
}
func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
val, _ := keyValue(kv, key, append(defaultValue, []bool{false})...)
return val
}
func (kv KV) Len() int {
return len(kv)
}
func (kv KV) Keys() iter.Seq[string] {
return maps.Keys(kv)
}
func (kv KV) Value(key string) any {
return kv[key]
}
func (ModelParameters) KV(t *Tokenizer) KV {
kv := KV{
"general.file_type": uint32(1), "general.file_type": uint32(1),
"general.quantization_version": uint32(2), "general.quantization_version": uint32(2),
"tokenizer.ggml.pre": t.Pre, "tokenizer.ggml.pre": t.Pre,
...@@ -63,7 +157,7 @@ func (ModelParameters) KV(t *Tokenizer) ggml.KV { ...@@ -63,7 +157,7 @@ func (ModelParameters) KV(t *Tokenizer) ggml.KV {
return kv return kv
} }
func (p AdapterParameters) KV() ggml.KV { func (p AdapterParameters) KV() KV {
var alpha float32 var alpha float32
if p.LoraParameters.Alpha == 0 { if p.LoraParameters.Alpha == 0 {
alpha = float32(p.Alpha) alpha = float32(p.Alpha)
...@@ -71,7 +165,7 @@ func (p AdapterParameters) KV() ggml.KV { ...@@ -71,7 +165,7 @@ func (p AdapterParameters) KV() ggml.KV {
alpha = p.LoraParameters.Alpha alpha = p.LoraParameters.Alpha
} }
kv := ggml.KV{ kv := KV{
"adapter.lora.alpha": alpha, "adapter.lora.alpha": alpha,
"adapter.type": "lora", "adapter.type": "lora",
"general.file_type": uint32(1), "general.file_type": uint32(1),
...@@ -88,9 +182,14 @@ func (ModelParameters) specialTokenTypes() []string { ...@@ -88,9 +182,14 @@ func (ModelParameters) specialTokenTypes() []string {
} }
} }
type ModelConverter interface { type ModelKV interface {
// KV maps parameters to LLM key-values // KV maps parameters to LLM key-values
KV(*Tokenizer) ggml.KV KV(*Tokenizer) KV
}
type ModelConverter interface {
ModelKV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []*ggml.Tensor Tensors([]Tensor) []*ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names. // Replacements returns a list of string pairs to replace in tensor names.
...@@ -107,7 +206,7 @@ type moreParser interface { ...@@ -107,7 +206,7 @@ type moreParser interface {
type AdapterConverter interface { type AdapterConverter interface {
// KV maps parameters to LLM key-values // KV maps parameters to LLM key-values
KV(ggml.KV) ggml.KV KV(ofs.Config) KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors([]Tensor) []*ggml.Tensor Tensors([]Tensor) []*ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names. // Replacements returns a list of string pairs to replace in tensor names.
...@@ -115,7 +214,7 @@ type AdapterConverter interface { ...@@ -115,7 +214,7 @@ type AdapterConverter interface {
Replacements() []string Replacements() []string
} }
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error { func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ofs.Config) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json") bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil { if err != nil {
return err return err
...@@ -126,8 +225,8 @@ func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error { ...@@ -126,8 +225,8 @@ func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
return err return err
} }
arch, ok := baseKV["general.architecture"] arch := baseKV.Architecture()
if !ok { if arch == "" {
return errors.New("architecture not set for the base model") return errors.New("architecture not set for the base model")
} }
...@@ -153,23 +252,19 @@ func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error { ...@@ -153,23 +252,19 @@ func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
return writeFile(f, conv.KV(baseKV), conv.Tensors(ts)) return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
} }
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, f *os.File) error {
bts, err := fs.ReadFile(fsys, "config.json") bts, err := fs.ReadFile(fsys, "config.json")
if err != nil { if err != nil {
return err return nil, nil, err
} }
var p ModelParameters var p ModelParameters
if err := json.Unmarshal(bts, &p); err != nil { if err := json.Unmarshal(bts, &p); err != nil {
return err return nil, nil, err
} }
if len(p.Architectures) < 1 { if len(p.Architectures) < 1 {
return errors.New("unknown architecture") return nil, nil, errors.New("unknown architecture")
} }
var conv ModelConverter var conv ModelConverter
...@@ -217,22 +312,22 @@ func ConvertModel(fsys fs.FS, f *os.File) error { ...@@ -217,22 +312,22 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
case "DeepseekV3ForCausalLM": case "DeepseekV3ForCausalLM":
conv = &deepseek2Model{} conv = &deepseek2Model{}
default: default:
return fmt.Errorf("unsupported architecture %q", p.Architectures[0]) return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
} }
if err := json.Unmarshal(bts, conv); err != nil { if err := json.Unmarshal(bts, conv); err != nil {
return err return nil, nil, err
} }
if t, ok := conv.(moreParser); ok { if t, ok := conv.(moreParser); ok {
if err := t.parseMore(fsys); err != nil { if err := t.parseMore(fsys); err != nil {
return err return nil, nil, err
} }
} }
t, err := parseTokenizer(fsys, conv.specialTokenTypes()) t, err := parseTokenizer(fsys, conv.specialTokenTypes())
if err != nil { if err != nil {
return err return nil, nil, err
} }
vocabSize := int(cmp.Or(p.VocabSize, p.TextModel.VocabSize)) vocabSize := int(cmp.Or(p.VocabSize, p.TextModel.VocabSize))
...@@ -254,6 +349,19 @@ func ConvertModel(fsys fs.FS, f *os.File) error { ...@@ -254,6 +349,19 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
default: default:
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
} }
return conv, t, nil
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, f *os.File) error {
kv, t, err := LoadModelMetadata(fsys)
if err != nil {
return err
}
conv := kv.(ModelConverter)
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...)) ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
if err != nil { if err != nil {
...@@ -263,7 +371,7 @@ func ConvertModel(fsys fs.FS, f *os.File) error { ...@@ -263,7 +371,7 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
return writeFile(f, conv.KV(t), conv.Tensors(ts)) return writeFile(f, conv.KV(t), conv.Tensors(ts))
} }
func writeFile(f *os.File, kv ggml.KV, ts []*ggml.Tensor) error { func writeFile(f *os.File, kv KV, ts []*ggml.Tensor) error {
for i := range ts { for i := range ts {
ts[i].Shape = slices.Clone(ts[i].Shape) ts[i].Shape = slices.Clone(ts[i].Shape)
slices.Reverse(ts[i].Shape) slices.Reverse(ts[i].Shape)
......
...@@ -88,7 +88,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error { ...@@ -88,7 +88,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error {
return nil return nil
} }
func (p *bertModel) KV(t *Tokenizer) ggml.KV { func (p *bertModel) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "bert" kv["general.architecture"] = "bert"
kv["bert.attention.causal"] = false kv["bert.attention.causal"] = false
......
...@@ -24,7 +24,7 @@ type commandrModel struct { ...@@ -24,7 +24,7 @@ type commandrModel struct {
var _ ModelConverter = (*commandrModel)(nil) var _ ModelConverter = (*commandrModel)(nil)
func (p *commandrModel) KV(t *Tokenizer) ggml.KV { func (p *commandrModel) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "command-r" kv["general.architecture"] = "command-r"
kv["general.name"] = "command-r" kv["general.name"] = "command-r"
......
...@@ -47,7 +47,7 @@ type deepseek2Model struct { ...@@ -47,7 +47,7 @@ type deepseek2Model struct {
Architecture string Architecture string
} }
func (p *deepseek2Model) KV(t *Tokenizer) ggml.KV { func (p *deepseek2Model) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "deepseek2" kv["general.architecture"] = "deepseek2"
kv["general.type"] = "model" kv["general.type"] = "model"
......
...@@ -41,7 +41,7 @@ type deepseekocr struct { ...@@ -41,7 +41,7 @@ type deepseekocr struct {
} `json:"vision_config"` } `json:"vision_config"`
} }
func (m *deepseekocr) KV(t *Tokenizer) ggml.KV { func (m *deepseekocr) KV(t *Tokenizer) KV {
kv := m.ModelParameters.KV(t) kv := m.ModelParameters.KV(t)
kv["general.architecture"] = "deepseekocr" kv["general.architecture"] = "deepseekocr"
kv["block_count"] = m.LanguageConfig.HiddenLayers kv["block_count"] = m.LanguageConfig.HiddenLayers
......
...@@ -23,7 +23,7 @@ type gemmaModel struct { ...@@ -23,7 +23,7 @@ type gemmaModel struct {
var _ ModelConverter = (*gemmaModel)(nil) var _ ModelConverter = (*gemmaModel)(nil)
func (p *gemmaModel) KV(t *Tokenizer) ggml.KV { func (p *gemmaModel) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma" kv["general.architecture"] = "gemma"
kv["gemma.context_length"] = p.MaxPositionEmbeddings kv["gemma.context_length"] = p.MaxPositionEmbeddings
......
package convert package convert
import "github.com/ollama/ollama/fs/ggml"
type gemma2Model struct { type gemma2Model struct {
gemmaModel gemmaModel
SlidingWindow uint32 `json:"sliding_window"` SlidingWindow uint32 `json:"sliding_window"`
...@@ -9,7 +7,7 @@ type gemma2Model struct { ...@@ -9,7 +7,7 @@ type gemma2Model struct {
FinalLogitSoftcap float32 `json:"final_logit_softcapping"` FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
} }
func (p *gemma2Model) KV(t *Tokenizer) ggml.KV { func (p *gemma2Model) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma2" kv["general.architecture"] = "gemma2"
kv["gemma2.context_length"] = p.MaxPositionEmbeddings kv["gemma2.context_length"] = p.MaxPositionEmbeddings
......
...@@ -6,6 +6,7 @@ import ( ...@@ -6,6 +6,7 @@ import (
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/fs/ggml"
) )
...@@ -15,7 +16,7 @@ type gemma2Adapter struct { ...@@ -15,7 +16,7 @@ type gemma2Adapter struct {
var _ AdapterConverter = (*gemma2Adapter)(nil) var _ AdapterConverter = (*gemma2Adapter)(nil)
func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV { func (p *gemma2Adapter) KV(baseKV fs.Config) KV {
kv := p.AdapterParameters.KV() kv := p.AdapterParameters.KV()
kv["general.architecture"] = "gemma2" kv["general.architecture"] = "gemma2"
return kv return kv
......
...@@ -3,8 +3,6 @@ package convert ...@@ -3,8 +3,6 @@ package convert
import ( import (
"cmp" "cmp"
"slices" "slices"
"github.com/ollama/ollama/fs/ggml"
) )
type gemma3Model struct { type gemma3Model struct {
...@@ -55,7 +53,7 @@ const ( ...@@ -55,7 +53,7 @@ const (
gemma27BLayerCount = 62 gemma27BLayerCount = 62
) )
func (p *gemma3Model) KV(t *Tokenizer) ggml.KV { func (p *gemma3Model) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma3" kv["general.architecture"] = "gemma3"
......
...@@ -38,7 +38,7 @@ type gemma3nModel struct { ...@@ -38,7 +38,7 @@ type gemma3nModel struct {
VisionModel struct{} `json:"vision_config"` VisionModel struct{} `json:"vision_config"`
} }
func (m *gemma3nModel) KV(t *Tokenizer) ggml.KV { func (m *gemma3nModel) KV(t *Tokenizer) KV {
kv := m.ModelParameters.KV(t) kv := m.ModelParameters.KV(t)
kv["general.architecture"] = "gemma3n" kv["general.architecture"] = "gemma3n"
kv["gemma3n.activation_sparsity_scale"] = slices.Collect(func(yield func(float32) bool) { kv["gemma3n.activation_sparsity_scale"] = slices.Collect(func(yield func(float32) bool) {
......
...@@ -37,7 +37,7 @@ type gptossModel struct { ...@@ -37,7 +37,7 @@ type gptossModel struct {
var _ ModelConverter = (*gptossModel)(nil) var _ ModelConverter = (*gptossModel)(nil)
func (m *gptossModel) KV(t *Tokenizer) ggml.KV { func (m *gptossModel) KV(t *Tokenizer) KV {
kv := m.ModelParameters.KV(t) kv := m.ModelParameters.KV(t)
kv["general.architecture"] = "gptoss" kv["general.architecture"] = "gptoss"
kv["general.file_type"] = uint32(4) kv["general.file_type"] = uint32(4)
......
...@@ -48,7 +48,7 @@ type llamaModel struct { ...@@ -48,7 +48,7 @@ type llamaModel struct {
var _ ModelConverter = (*llamaModel)(nil) var _ ModelConverter = (*llamaModel)(nil)
func (p *llamaModel) KV(t *Tokenizer) ggml.KV { func (p *llamaModel) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "llama" kv["general.architecture"] = "llama"
kv["llama.vocab_size"] = p.VocabSize kv["llama.vocab_size"] = p.VocabSize
......
...@@ -35,7 +35,7 @@ type llama4Model struct { ...@@ -35,7 +35,7 @@ type llama4Model struct {
} }
// KV implements ModelConverter. // KV implements ModelConverter.
func (p *llama4Model) KV(t *Tokenizer) ggml.KV { func (p *llama4Model) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "llama4" kv["general.architecture"] = "llama4"
......
...@@ -7,6 +7,7 @@ import ( ...@@ -7,6 +7,7 @@ import (
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
"github.com/pdevine/tensor/native" "github.com/pdevine/tensor/native"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/fs/ggml"
) )
...@@ -18,13 +19,13 @@ type llamaAdapter struct { ...@@ -18,13 +19,13 @@ type llamaAdapter struct {
var _ AdapterConverter = (*llamaAdapter)(nil) var _ AdapterConverter = (*llamaAdapter)(nil)
func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV { func (p *llamaAdapter) KV(baseKV fs.Config) KV {
kv := p.AdapterParameters.KV() kv := p.AdapterParameters.KV()
kv["general.architecture"] = "llama" kv["general.architecture"] = "llama"
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"] kv["llama.attention.head_count"] = baseKV.Value("llama.attention.head_count")
kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"] kv["llama.attention.head_count_kv"] = baseKV.Value("llama.attention.head_count_kv")
p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32) p.NumAttentionHeads = baseKV.Value("llama.attention.head_count").(uint32)
return kv return kv
} }
......
...@@ -60,7 +60,7 @@ type mistral3Model struct { ...@@ -60,7 +60,7 @@ type mistral3Model struct {
ProjectorHiddenAct string `json:"projector_hidden_act"` ProjectorHiddenAct string `json:"projector_hidden_act"`
} }
func (p *mistral3Model) KV(t *Tokenizer) ggml.KV { func (p *mistral3Model) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "mistral3" kv["general.architecture"] = "mistral3"
kv["mistral3.vocab_size"] = p.TextModel.VocabSize kv["mistral3.vocab_size"] = p.TextModel.VocabSize
......
...@@ -39,7 +39,7 @@ type mistral3CausalModel struct { ...@@ -39,7 +39,7 @@ type mistral3CausalModel struct {
} `json:"rope_parameters"` } `json:"rope_parameters"`
} }
func (p *mistral3CausalModel) KV(t *Tokenizer) ggml.KV { func (p *mistral3CausalModel) KV(t *Tokenizer) KV {
kv := p.ModelParameters.KV(t) kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "mistral3" kv["general.architecture"] = "mistral3"
kv["mistral3.vocab_size"] = p.VocabSize kv["mistral3.vocab_size"] = p.VocabSize
......
...@@ -12,7 +12,7 @@ type mixtralModel struct { ...@@ -12,7 +12,7 @@ type mixtralModel struct {
NumExpertsPerToken uint32 `json:"num_experts_per_tok"` NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
} }
func (p *mixtralModel) KV(t *Tokenizer) ggml.KV { func (p *mixtralModel) KV(t *Tokenizer) KV {
kv := p.llamaModel.KV(t) kv := p.llamaModel.KV(t)
if p.NumLocalExperts > 0 { if p.NumLocalExperts > 0 {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment