Unverified Commit 3c8df380 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Merge pull request #2885 from dhiltgen/rocm_v6_only

Revamp ROCm support
parents 7d564835 6c5ccb11
...@@ -179,17 +179,21 @@ fi ...@@ -179,17 +179,21 @@ fi
if [ -d "${ROCM_PATH}" ]; then if [ -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library" echo "ROCm libraries detected - building dynamic ROCm library"
if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true) ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
fi fi
init_vars init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build build
# Note: the ROCM libs and runtime library files are too large to embed, so we depend on # Record the ROCM dependencies
# them being present at runtime on the host rm -f "${BUILD_DIR}/lib/deps.txt"
touch "${BUILD_DIR}/lib/deps.txt"
for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt"
done
compress_libs compress_libs
fi fi
......
...@@ -2,19 +2,52 @@ ...@@ -2,19 +2,52 @@
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
function amdGPUs {
if ($env:AMDGPU_TARGETS) {
return $env:AMDGPU_TARGETS
}
# TODO - load from some common data file for linux + windows build consistency
$GPU_LIST = @(
"gfx900"
"gfx906:xnack-"
"gfx908:xnack-"
"gfx90a:xnack+"
"gfx90a:xnack-"
"gfx1010"
"gfx1012"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
)
$GPU_LIST -join ';'
}
function init_vars { function init_vars {
# Verify the environment is a Developer Shell for MSVC 2019
write-host $env:VSINSTALLDIR
if (($env:VSINSTALLDIR -eq $null)) {
Write-Error "`r`nBUILD ERROR - YOUR DEVELOPMENT ENVIRONMENT IS NOT SET UP CORRECTLY`r`nTo build Ollama you must run from an MSVC Developer Shell`r`nSee .\docs\development.md for instructions to set up your dev environment"
exit 1
}
$script:SRC_DIR = $(resolve-path "..\..\") $script:SRC_DIR = $(resolve-path "..\..\")
$script:llamacppDir = "../llama.cpp" $script:llamacppDir = "../llama.cpp"
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A", "x64") $script:cmakeDefs = @(
"-DBUILD_SHARED_LIBS=on",
"-DLLAMA_NATIVE=off"
)
$script:cmakeTargets = @("ext_server") $script:cmakeTargets = @("ext_server")
$script:ARCH = "amd64" # arm not yet supported. $script:ARCH = "amd64" # arm not yet supported.
if ($env:CGO_CFLAGS -contains "-g") { if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on") $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
$script:config = "RelWithDebInfo" $script:config = "RelWithDebInfo"
} else { } else {
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off") $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release")
$script:config = "Release" $script:config = "Release"
} }
if ($null -ne $env:CMAKE_SYSTEM_VERSION) {
$script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}")
}
# Try to find the CUDA dir # Try to find the CUDA dir
if ($env:CUDA_LIB_DIR -eq $null) { if ($env:CUDA_LIB_DIR -eq $null) {
$d=(get-command -ea 'silentlycontinue' nvcc).path $d=(get-command -ea 'silentlycontinue' nvcc).path
...@@ -157,7 +190,7 @@ apply_patches ...@@ -157,7 +190,7 @@ apply_patches
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on") $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
init_vars init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
write-host "Building LCD CPU" write-host "Building LCD CPU"
build build
...@@ -166,7 +199,7 @@ sign ...@@ -166,7 +199,7 @@ sign
compress_libs compress_libs
init_vars init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
write-host "Building AVX CPU" write-host "Building AVX CPU"
build build
...@@ -175,7 +208,7 @@ sign ...@@ -175,7 +208,7 @@ sign
compress_libs compress_libs
init_vars init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
write-host "Building AVX2 CPU" write-host "Building AVX2 CPU"
build build
...@@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) { ...@@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) {
} }
init_vars init_vars
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}") $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
write-host "Building CUDA"
build build
install install
sign sign
compress_libs compress_libs
} }
# TODO - actually implement ROCm support on windows
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
rm -ea 0 -recurse -force -path "${script:buildDir}/lib" if ($null -ne $env:HIP_PATH) {
md "${script:buildDir}/lib" -ea 0 > $null $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
echo $null >> "${script:buildDir}/lib/.generated" if ($null -ne $script:ROCM_VERSION) {
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
}
init_vars
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
$script:cmakeDefs += @(
"-G", "Ninja",
"-DCMAKE_C_COMPILER=clang.exe",
"-DCMAKE_CXX_COMPILER=clang++.exe",
"-DLLAMA_HIPBLAS=on",
"-DLLAMA_AVX=on",
"-DLLAMA_AVX2=off",
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
"-DAMDGPU_TARGETS=$(amdGPUs)",
"-DGPU_TARGETS=$(amdGPUs)"
)
# Make sure the ROCm binary dir is first in the path
$env:PATH="$env:HIP_PATH\bin;$env:VSINSTALLDIR\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja;$env:PATH"
# We have to clobber the LIB var from the developer shell for clang to work properly
$env:LIB=""
write-host "Building ROCm"
build
# Ninja doesn't prefix with config name
${script:config}=""
install
if ($null -ne $script:DUMPBIN) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
}
sign
compress_libs
}
cleanup cleanup
write-host "`ngo generate completed" write-host "`ngo generate completed"
...@@ -19,7 +19,7 @@ type LLM interface { ...@@ -19,7 +19,7 @@ type LLM interface {
Close() Close()
} }
func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { func New(model string, adapters, projectors []string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil { if _, err := os.Stat(model); err != nil {
return nil, err return nil, err
} }
...@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options) ...@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts.RopeFrequencyBase = 0.0 opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0 opts.RopeFrequencyScale = 0.0
return newLlmServer(info, workDir, model, adapters, projectors, opts) return newLlmServer(info, model, adapters, projectors, opts)
} }
// Give any native cgo implementations an opportunity to initialize // Give any native cgo implementations an opportunity to initialize
func Init(workdir string) error { func Init() error {
return nativeInit(workdir) return nativeInit()
} }
func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
dynLibs := getDynLibs(gpuInfo) dynLibs := getDynLibs(gpuInfo)
// Check to see if the user has requested a specific library instead of auto-detecting // Check to see if the user has requested a specific library instead of auto-detecting
...@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto ...@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto
_, err := os.Stat(dynLibs[0]) _, err := os.Stat(dynLibs[0])
if err != nil { if err != nil {
slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0])) slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0]))
err = nativeInit(workDir) err = nativeInit()
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
...@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool { ...@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool {
return false return false
} }
func nativeInit(workdir string) error { func nativeInit() error {
slog.Info("Extracting dynamic libraries...") slog.Info("Extracting dynamic libraries...")
assetsDir, err := gpu.AssetsDir()
if err != nil {
return err
}
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal") err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
// TODO perhaps consider this a hard failure on arm macs? // TODO perhaps consider this a hard failure on arm macs?
...@@ -115,10 +119,10 @@ func nativeInit(workdir string) error { ...@@ -115,10 +119,10 @@ func nativeInit(workdir string) error {
} }
return err return err
} }
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir)
} }
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*") libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
slog.Info(fmt.Sprintf("%s", payloadMissing)) slog.Info(fmt.Sprintf("%s", payloadMissing))
...@@ -149,17 +153,13 @@ func nativeInit(workdir string) error { ...@@ -149,17 +153,13 @@ func nativeInit(workdir string) error {
return nil return nil
} }
func extractDynamicLibs(workDir, glob string) ([]string, error) { func extractDynamicLibs(assetsDir, glob string) ([]string, error) {
files, err := fs.Glob(libEmbed, glob) files, err := fs.Glob(libEmbed, glob)
if err != nil || len(files) == 0 { if err != nil || len(files) == 0 {
return nil, payloadMissing return nil, payloadMissing
} }
libs := []string{} libs := []string{}
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
// and tracking by version so we don't reexpand the files every time
// Also maybe consider lazy loading only what is needed
g := new(errgroup.Group) g := new(errgroup.Group)
for _, file := range files { for _, file := range files {
pathComps := strings.Split(file, "/") pathComps := strings.Split(file, "/")
...@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
g.Go(func() error { g.Go(func() error {
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs // Include the variant in the path to avoid conflicts between multiple server libs
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3]) targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3])
srcFile, err := libEmbed.Open(file) srcFile, err := libEmbed.Open(file)
if err != nil { if err != nil {
return fmt.Errorf("read payload %s: %v", file, err) return fmt.Errorf("read payload %s: %v", file, err)
} }
defer srcFile.Close() defer srcFile.Close()
if err := os.MkdirAll(targetDir, 0o755); err != nil { if err := os.MkdirAll(targetDir, 0o755); err != nil {
return fmt.Errorf("create payload temp dir %s: %v", workDir, err) return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
} }
src := io.Reader(srcFile) src := io.Reader(srcFile)
filename := file filename := file
...@@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
libs = append(libs, destFile) libs = append(libs, destFile)
} }
_, err = os.Stat(destFile) destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
switch { if err != nil {
case errors.Is(err, os.ErrNotExist): return fmt.Errorf("write payload %s: %v", file, err)
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) }
if err != nil { defer destFp.Close()
return fmt.Errorf("write payload %s: %v", file, err) if _, err := io.Copy(destFp, src); err != nil {
} return fmt.Errorf("copy payload %s: %v", file, err)
defer destFile.Close()
if _, err := io.Copy(destFile, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err)
}
case err != nil:
return fmt.Errorf("stat payload %s: %v", file, err)
} }
return nil return nil
}) })
...@@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
return libs, g.Wait() return libs, g.Wait()
} }
func extractPayloadFiles(workDir, glob string) error { func extractPayloadFiles(assetsDir, glob string) error {
files, err := fs.Glob(libEmbed, glob) files, err := fs.Glob(libEmbed, glob)
if err != nil || len(files) == 0 { if err != nil || len(files) == 0 {
return payloadMissing return payloadMissing
...@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error { ...@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error {
return fmt.Errorf("read payload %s: %v", file, err) return fmt.Errorf("read payload %s: %v", file, err)
} }
defer srcFile.Close() defer srcFile.Close()
if err := os.MkdirAll(workDir, 0o755); err != nil { if err := os.MkdirAll(assetsDir, 0o755); err != nil {
return fmt.Errorf("create payload temp dir %s: %v", workDir, err) return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
} }
src := io.Reader(srcFile) src := io.Reader(srcFile)
filename := file filename := file
...@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error { ...@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error {
filename = strings.TrimSuffix(filename, ".gz") filename = strings.TrimSuffix(filename, ".gz")
} }
destFile := filepath.Join(workDir, filepath.Base(filename)) destFile := filepath.Join(assetsDir, filepath.Base(filename))
_, err = os.Stat(destFile) _, err = os.Stat(destFile)
switch { switch {
case errors.Is(err, os.ErrNotExist): case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil { if err != nil {
return fmt.Errorf("write payload %s: %v", file, err) return fmt.Errorf("write payload %s: %v", file, err)
} }
defer destFile.Close() defer destFp.Close()
if _, err := io.Copy(destFile, src); err != nil { if _, err := io.Copy(destFp, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err) return fmt.Errorf("copy payload %s: %v", file, err)
} }
case err != nil: case err != nil:
return fmt.Errorf("stat payload %s: %v", file, err) return fmt.Errorf("stat payload %s: %v", file, err)
case err == nil:
slog.Debug("payload already exists: " + destFile)
} }
} }
return nil return nil
......
...@@ -4,5 +4,5 @@ import ( ...@@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed llama.cpp/build/linux/*/*/lib/*.so* //go:embed llama.cpp/build/linux/*/*/lib/*
var libEmbed embed.FS var libEmbed embed.FS
...@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do ...@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do
. .
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/
docker rm builder-$TARGETARCH docker rm builder-$TARGETARCH
done done
...@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute ...@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function // load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error { func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
workDir := c.GetString("workDir")
needLoad := loaded.runner == nil || // is there a model loaded? needLoad := loaded.runner == nil || // is there a model loaded?
loaded.ModelPath != model.ModelPath || // has the base model changed? loaded.ModelPath != model.ModelPath || // has the base model changed?
!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed? !reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
...@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D ...@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
loaded.Options = nil loaded.Options = nil
} }
llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts) llmRunner, err := llm.New(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
if err != nil { if err != nil {
// some older models are not compatible with newer versions of llama.cpp // some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to // show a generalized compatibility error until there is a better way to
...@@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error { ...@@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error {
os.Exit(0) os.Exit(0)
}() }()
if err := llm.Init(s.WorkDir); err != nil { if err := llm.Init(); err != nil {
return fmt.Errorf("unable to initialize llm library %w", err) return fmt.Errorf("unable to initialize llm library %w", err)
} }
if runtime.GOOS == "linux" { // TODO - windows too if runtime.GOOS == "linux" { // TODO - windows too
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment