Commit 6c5ccb11 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Revamp ROCm support

This refines where we extract the LLM libraries to by adding a new
OLLAMA_HOME env var, that defaults to `~/.ollama` The logic was already
idempotenent, so this should speed up startups after the first time a
new release is deployed.  It also cleans up after itself.

We now build only a single ROCm version (latest major) on both windows
and linux.  Given the large size of ROCms tensor files, we split the
dependency out.  It's bundled into the installer on windows, and a
separate download on windows.  The linux install script is now smart and
detects the presence of AMD GPUs and looks to see if rocm v6 is already
present, and if not, then downloads our dependency tar file.

For Linux discovery, we now use sysfs and check each GPU against what
ROCm supports so we can degrade to CPU gracefully instead of having
llama.cpp+rocm assert/crash on us.  For Windows, we now use go's windows
dynamic library loading logic to access the amdhip64.dll APIs to query
the GPU information.
parent 2e20110e
...@@ -179,17 +179,21 @@ fi ...@@ -179,17 +179,21 @@ fi
if [ -d "${ROCM_PATH}" ]; then if [ -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library" echo "ROCm libraries detected - building dynamic ROCm library"
if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true) ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
fi fi
init_vars init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build build
# Note: the ROCM libs and runtime library files are too large to embed, so we depend on # Record the ROCM dependencies
# them being present at runtime on the host rm -f "${BUILD_DIR}/lib/deps.txt"
touch "${BUILD_DIR}/lib/deps.txt"
for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt"
done
compress_libs compress_libs
fi fi
......
...@@ -2,19 +2,52 @@ ...@@ -2,19 +2,52 @@
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
function amdGPUs {
if ($env:AMDGPU_TARGETS) {
return $env:AMDGPU_TARGETS
}
# TODO - load from some common data file for linux + windows build consistency
$GPU_LIST = @(
"gfx900"
"gfx906:xnack-"
"gfx908:xnack-"
"gfx90a:xnack+"
"gfx90a:xnack-"
"gfx1010"
"gfx1012"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
)
$GPU_LIST -join ';'
}
function init_vars { function init_vars {
# Verify the environment is a Developer Shell for MSVC 2019
write-host $env:VSINSTALLDIR
if (($env:VSINSTALLDIR -eq $null)) {
Write-Error "`r`nBUILD ERROR - YOUR DEVELOPMENT ENVIRONMENT IS NOT SET UP CORRECTLY`r`nTo build Ollama you must run from an MSVC Developer Shell`r`nSee .\docs\development.md for instructions to set up your dev environment"
exit 1
}
$script:SRC_DIR = $(resolve-path "..\..\") $script:SRC_DIR = $(resolve-path "..\..\")
$script:llamacppDir = "../llama.cpp" $script:llamacppDir = "../llama.cpp"
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A", "x64") $script:cmakeDefs = @(
"-DBUILD_SHARED_LIBS=on",
"-DLLAMA_NATIVE=off"
)
$script:cmakeTargets = @("ext_server") $script:cmakeTargets = @("ext_server")
$script:ARCH = "amd64" # arm not yet supported. $script:ARCH = "amd64" # arm not yet supported.
if ($env:CGO_CFLAGS -contains "-g") { if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on") $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
$script:config = "RelWithDebInfo" $script:config = "RelWithDebInfo"
} else { } else {
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off") $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release")
$script:config = "Release" $script:config = "Release"
} }
if ($null -ne $env:CMAKE_SYSTEM_VERSION) {
$script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}")
}
# Try to find the CUDA dir # Try to find the CUDA dir
if ($env:CUDA_LIB_DIR -eq $null) { if ($env:CUDA_LIB_DIR -eq $null) {
$d=(get-command -ea 'silentlycontinue' nvcc).path $d=(get-command -ea 'silentlycontinue' nvcc).path
...@@ -157,7 +190,7 @@ apply_patches ...@@ -157,7 +190,7 @@ apply_patches
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on") $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
init_vars init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
write-host "Building LCD CPU" write-host "Building LCD CPU"
build build
...@@ -166,7 +199,7 @@ sign ...@@ -166,7 +199,7 @@ sign
compress_libs compress_libs
init_vars init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
write-host "Building AVX CPU" write-host "Building AVX CPU"
build build
...@@ -175,7 +208,7 @@ sign ...@@ -175,7 +208,7 @@ sign
compress_libs compress_libs
init_vars init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
write-host "Building AVX2 CPU" write-host "Building AVX2 CPU"
build build
...@@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) { ...@@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) {
} }
init_vars init_vars
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}") $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
write-host "Building CUDA"
build build
install install
sign sign
compress_libs compress_libs
} }
# TODO - actually implement ROCm support on windows
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
rm -ea 0 -recurse -force -path "${script:buildDir}/lib" if ($null -ne $env:HIP_PATH) {
md "${script:buildDir}/lib" -ea 0 > $null $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
echo $null >> "${script:buildDir}/lib/.generated" if ($null -ne $script:ROCM_VERSION) {
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
}
init_vars
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
$script:cmakeDefs += @(
"-G", "Ninja",
"-DCMAKE_C_COMPILER=clang.exe",
"-DCMAKE_CXX_COMPILER=clang++.exe",
"-DLLAMA_HIPBLAS=on",
"-DLLAMA_AVX=on",
"-DLLAMA_AVX2=off",
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
"-DAMDGPU_TARGETS=$(amdGPUs)",
"-DGPU_TARGETS=$(amdGPUs)"
)
# Make sure the ROCm binary dir is first in the path
$env:PATH="$env:HIP_PATH\bin;$env:VSINSTALLDIR\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja;$env:PATH"
# We have to clobber the LIB var from the developer shell for clang to work properly
$env:LIB=""
write-host "Building ROCm"
build
# Ninja doesn't prefix with config name
${script:config}=""
install
if ($null -ne $script:DUMPBIN) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
}
sign
compress_libs
}
cleanup cleanup
write-host "`ngo generate completed" write-host "`ngo generate completed"
...@@ -19,7 +19,7 @@ type LLM interface { ...@@ -19,7 +19,7 @@ type LLM interface {
Close() Close()
} }
func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { func New(model string, adapters, projectors []string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil { if _, err := os.Stat(model); err != nil {
return nil, err return nil, err
} }
...@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options) ...@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts.RopeFrequencyBase = 0.0 opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0 opts.RopeFrequencyScale = 0.0
return newLlmServer(info, workDir, model, adapters, projectors, opts) return newLlmServer(info, model, adapters, projectors, opts)
} }
// Give any native cgo implementations an opportunity to initialize // Give any native cgo implementations an opportunity to initialize
func Init(workdir string) error { func Init() error {
return nativeInit(workdir) return nativeInit()
} }
func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
dynLibs := getDynLibs(gpuInfo) dynLibs := getDynLibs(gpuInfo)
// Check to see if the user has requested a specific library instead of auto-detecting // Check to see if the user has requested a specific library instead of auto-detecting
...@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto ...@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto
_, err := os.Stat(dynLibs[0]) _, err := os.Stat(dynLibs[0])
if err != nil { if err != nil {
slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0])) slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0]))
err = nativeInit(workDir) err = nativeInit()
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
...@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool { ...@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool {
return false return false
} }
func nativeInit(workdir string) error { func nativeInit() error {
slog.Info("Extracting dynamic libraries...") slog.Info("Extracting dynamic libraries...")
assetsDir, err := gpu.AssetsDir()
if err != nil {
return err
}
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal") err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
// TODO perhaps consider this a hard failure on arm macs? // TODO perhaps consider this a hard failure on arm macs?
...@@ -115,10 +119,10 @@ func nativeInit(workdir string) error { ...@@ -115,10 +119,10 @@ func nativeInit(workdir string) error {
} }
return err return err
} }
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir)
} }
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*") libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
slog.Info(fmt.Sprintf("%s", payloadMissing)) slog.Info(fmt.Sprintf("%s", payloadMissing))
...@@ -149,17 +153,13 @@ func nativeInit(workdir string) error { ...@@ -149,17 +153,13 @@ func nativeInit(workdir string) error {
return nil return nil
} }
func extractDynamicLibs(workDir, glob string) ([]string, error) { func extractDynamicLibs(assetsDir, glob string) ([]string, error) {
files, err := fs.Glob(libEmbed, glob) files, err := fs.Glob(libEmbed, glob)
if err != nil || len(files) == 0 { if err != nil || len(files) == 0 {
return nil, payloadMissing return nil, payloadMissing
} }
libs := []string{} libs := []string{}
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
// and tracking by version so we don't reexpand the files every time
// Also maybe consider lazy loading only what is needed
g := new(errgroup.Group) g := new(errgroup.Group)
for _, file := range files { for _, file := range files {
pathComps := strings.Split(file, "/") pathComps := strings.Split(file, "/")
...@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
g.Go(func() error { g.Go(func() error {
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs // Include the variant in the path to avoid conflicts between multiple server libs
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3]) targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3])
srcFile, err := libEmbed.Open(file) srcFile, err := libEmbed.Open(file)
if err != nil { if err != nil {
return fmt.Errorf("read payload %s: %v", file, err) return fmt.Errorf("read payload %s: %v", file, err)
} }
defer srcFile.Close() defer srcFile.Close()
if err := os.MkdirAll(targetDir, 0o755); err != nil { if err := os.MkdirAll(targetDir, 0o755); err != nil {
return fmt.Errorf("create payload temp dir %s: %v", workDir, err) return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
} }
src := io.Reader(srcFile) src := io.Reader(srcFile)
filename := file filename := file
...@@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
libs = append(libs, destFile) libs = append(libs, destFile)
} }
_, err = os.Stat(destFile) destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
switch { if err != nil {
case errors.Is(err, os.ErrNotExist): return fmt.Errorf("write payload %s: %v", file, err)
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) }
if err != nil { defer destFp.Close()
return fmt.Errorf("write payload %s: %v", file, err) if _, err := io.Copy(destFp, src); err != nil {
} return fmt.Errorf("copy payload %s: %v", file, err)
defer destFile.Close()
if _, err := io.Copy(destFile, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err)
}
case err != nil:
return fmt.Errorf("stat payload %s: %v", file, err)
} }
return nil return nil
}) })
...@@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
return libs, g.Wait() return libs, g.Wait()
} }
func extractPayloadFiles(workDir, glob string) error { func extractPayloadFiles(assetsDir, glob string) error {
files, err := fs.Glob(libEmbed, glob) files, err := fs.Glob(libEmbed, glob)
if err != nil || len(files) == 0 { if err != nil || len(files) == 0 {
return payloadMissing return payloadMissing
...@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error { ...@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error {
return fmt.Errorf("read payload %s: %v", file, err) return fmt.Errorf("read payload %s: %v", file, err)
} }
defer srcFile.Close() defer srcFile.Close()
if err := os.MkdirAll(workDir, 0o755); err != nil { if err := os.MkdirAll(assetsDir, 0o755); err != nil {
return fmt.Errorf("create payload temp dir %s: %v", workDir, err) return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
} }
src := io.Reader(srcFile) src := io.Reader(srcFile)
filename := file filename := file
...@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error { ...@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error {
filename = strings.TrimSuffix(filename, ".gz") filename = strings.TrimSuffix(filename, ".gz")
} }
destFile := filepath.Join(workDir, filepath.Base(filename)) destFile := filepath.Join(assetsDir, filepath.Base(filename))
_, err = os.Stat(destFile) _, err = os.Stat(destFile)
switch { switch {
case errors.Is(err, os.ErrNotExist): case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil { if err != nil {
return fmt.Errorf("write payload %s: %v", file, err) return fmt.Errorf("write payload %s: %v", file, err)
} }
defer destFile.Close() defer destFp.Close()
if _, err := io.Copy(destFile, src); err != nil { if _, err := io.Copy(destFp, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err) return fmt.Errorf("copy payload %s: %v", file, err)
} }
case err != nil: case err != nil:
return fmt.Errorf("stat payload %s: %v", file, err) return fmt.Errorf("stat payload %s: %v", file, err)
case err == nil:
slog.Debug("payload already exists: " + destFile)
} }
} }
return nil return nil
......
...@@ -4,5 +4,5 @@ import ( ...@@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed llama.cpp/build/linux/*/*/lib/*.so* //go:embed llama.cpp/build/linux/*/*/lib/*
var libEmbed embed.FS var libEmbed embed.FS
...@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do ...@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do
. .
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/
docker rm builder-$TARGETARCH docker rm builder-$TARGETARCH
done done
...@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute ...@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function // load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error { func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
workDir := c.GetString("workDir")
needLoad := loaded.runner == nil || // is there a model loaded? needLoad := loaded.runner == nil || // is there a model loaded?
loaded.ModelPath != model.ModelPath || // has the base model changed? loaded.ModelPath != model.ModelPath || // has the base model changed?
!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed? !reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
...@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D ...@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
loaded.Options = nil loaded.Options = nil
} }
llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts) llmRunner, err := llm.New(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
if err != nil { if err != nil {
// some older models are not compatible with newer versions of llama.cpp // some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to // show a generalized compatibility error until there is a better way to
...@@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error { ...@@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error {
os.Exit(0) os.Exit(0)
}() }()
if err := llm.Init(s.WorkDir); err != nil { if err := llm.Init(); err != nil {
return fmt.Errorf("unable to initialize llm library %w", err) return fmt.Errorf("unable to initialize llm library %w", err)
} }
if runtime.GOOS == "linux" { // TODO - windows too if runtime.GOOS == "linux" { // TODO - windows too
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment