Unverified Commit cd5c8f64 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Optimize container images for startup (#6547)

* Optimize container images for startup

This change adjusts how to handle runner payloads to support
container builds where we keep them extracted in the filesystem.
This makes it easier to optimize the cpu/cuda vs cpu/rocm images for
size, and should result in faster startup times for container images.

* Refactor payload logic and add buildx support for faster builds

* Move payloads around

* Review comments

* Converge to buildx based helper scripts

* Use docker buildx action for release
parent fef257c5
package llm
import (
"embed"
"syscall"
)
//go:embed build/darwin/x86_64/*/bin/*
var libEmbed embed.FS
var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
package llm
import (
"embed"
"syscall"
)
//go:embed build/linux/*/*/bin/*
var libEmbed embed.FS
var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
package llm
import (
"embed"
"syscall"
)
// unused on windows
var libEmbed embed.FS
const CREATE_DEFAULT_ERROR_MODE = 0x04000000
var LlamaServerSysProcAttr = &syscall.SysProcAttr{
......
......@@ -24,9 +24,11 @@ import (
"golang.org/x/sync/semaphore"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/build"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/runners"
)
type LlamaServer interface {
......@@ -106,7 +108,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
gpus = gpu.GetCPUInfo()
}
if len(gpus) == 1 && gpus[0].Library == "cpu" {
cpuRunner = serverForCpu()
cpuRunner = runners.ServerForCpu()
estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
} else {
estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
......@@ -118,7 +120,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
opts.NumGPU = 0
case gpus[0].Library != "metal" && estimate.Layers == 0:
// Don't bother loading into the GPU if no layers can fit
cpuRunner = serverForCpu()
cpuRunner = runners.ServerForCpu()
gpus = gpu.GetCPUInfo()
case opts.NumGPU < 0 && estimate.Layers > 0 && gpus[0].Library != "cpu":
opts.NumGPU = estimate.Layers
......@@ -145,25 +147,20 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
}
availableServers := getAvailableServers()
rDir, err := runners.Refresh(build.EmbedFS)
if err != nil {
return nil, err
}
availableServers := runners.GetAvailableServers(rDir)
if len(availableServers) == 0 {
if runtime.GOOS != "windows" {
slog.Warn("llama server binary disappeared, reinitializing payloads")
err = Init()
if err != nil {
slog.Warn("failed to reinitialize payloads", "error", err)
return nil, err
}
availableServers = getAvailableServers()
} else {
return nil, finalErr
}
return nil, finalErr
}
var servers []string
if cpuRunner != "" {
servers = []string{cpuRunner}
} else {
servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
}
demandLib := envconfig.LLMLibrary()
if demandLib != "" {
......@@ -330,7 +327,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
_, err := os.Stat(server)
if errors.Is(err, os.ErrNotExist) {
slog.Warn("llama server disappeared, reinitializing payloads", "path", server, "error", err)
err = Init()
_, err = runners.Refresh(build.EmbedFS)
if err != nil {
slog.Warn("failed to reinitialize payloads", "error", err)
return nil, err
......
package llm
package runners
import (
"compress/gzip"
......@@ -11,49 +11,272 @@ import (
"path/filepath"
"runtime"
"slices"
"strconv"
"strings"
"sync"
"syscall"
"golang.org/x/sync/errgroup"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/gpu"
)
var errPayloadMissing = errors.New("expected payloads not included in this build of ollama")
const (
binGlob = "*/*/*/*"
)
func Init() error {
payloadsDir, err := gpu.PayloadsDir()
if err != nil {
return err
var (
lock sync.Mutex
runnersDir = ""
)
// Return the location where runners are stored
// If runners are payloads, this will either extract them
// or refresh them if any have disappeared due to tmp cleaners
func Refresh(payloadFS fs.FS) (string, error) {
lock.Lock()
defer lock.Unlock()
var err error
// Wire up extra logging on our first load
if runnersDir == "" {
defer func() {
var runners []string
for v := range GetAvailableServers(runnersDir) {
runners = append(runners, v)
}
slog.Info("Dynamic LLM libraries", "runners", runners)
slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
}()
}
if runtime.GOOS != "windows" {
slog.Info("extracting embedded files", "dir", payloadsDir)
binGlob := "build/*/*/*/bin/*"
if hasPayloads(payloadFS) {
if runnersDir == "" {
runnersDir, err = extractRunners(payloadFS)
} else {
err = refreshRunners(payloadFS, runnersDir)
}
} else if runnersDir == "" {
runnersDir, err = locateRunners()
}
// extract server libraries
err = extractFiles(payloadsDir, binGlob)
return runnersDir, err
}
func Cleanup(payloadFS fs.FS) {
lock.Lock()
defer lock.Unlock()
if hasPayloads(payloadFS) && runnersDir != "" {
// We want to fully clean up the tmpdir parent of the payloads dir
tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
slog.Debug("cleaning up", "dir", tmpDir)
err := os.RemoveAll(tmpDir)
if err != nil {
return fmt.Errorf("extract binaries: %v", err)
slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
}
}
}
func locateRunners() (string, error) {
exe, err := os.Executable()
if err != nil {
return "", err
}
cwd, err := os.Getwd()
if err != nil {
return "", err
}
var variants []string
for v := range getAvailableServers() {
variants = append(variants, v)
var paths []string
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
paths = append(paths,
root,
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for _, path := range paths {
candidate := filepath.Join(path, "lib", "ollama", "runners")
if _, err := os.Stat(candidate); err == nil {
return candidate, nil
}
}
return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
}
// Return true if we're carying nested payloads for the runners
func hasPayloads(payloadFS fs.FS) bool {
files, err := fs.Glob(payloadFS, binGlob)
if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
return false
}
return true
}
func extractRunners(payloadFS fs.FS) (string, error) {
cleanupTmpDirs()
tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
if err != nil {
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
}
// Track our pid so we can clean up orphaned tmpdirs
n := filepath.Join(tmpDir, "ollama.pid")
if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
slog.Warn("failed to write pid file", "file", n, "error", err)
}
// We create a distinct subdirectory for payloads within the tmpdir
// This will typically look like /tmp/ollama3208993108/runners on linux
rDir := filepath.Join(tmpDir, "runners")
slog.Info("extracting embedded files", "dir", rDir)
return rDir, refreshRunners(payloadFS, rDir)
}
func refreshRunners(payloadFS fs.FS, rDir string) error {
// extract or refresh server libraries
err := extractFiles(payloadFS, rDir, binGlob)
if err != nil {
return fmt.Errorf("extract binaries: %v", err)
}
return nil
}
// extract extracts the embedded files to the target directory
func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
files, err := fs.Glob(payloadFS, glob)
if err != nil || len(files) == 0 {
// Should not happen
return fmt.Errorf("extractFiles called without payload present")
}
if err := os.MkdirAll(targetDir, 0o755); err != nil {
return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
}
g := new(errgroup.Group)
// $OS/$GOARCH/$RUNNER/$FILE
for _, file := range files {
filename := file
runner := filepath.Base(filepath.Dir(filename))
slog.Debug("extracting", "runner", runner, "payload", filename)
g.Go(func() error {
srcf, err := payloadFS.Open(filename)
if err != nil {
return err
}
defer srcf.Close()
src := io.Reader(srcf)
if strings.HasSuffix(filename, ".gz") {
src, err = gzip.NewReader(src)
if err != nil {
return fmt.Errorf("decompress payload %s: %v", filename, err)
}
filename = strings.TrimSuffix(filename, ".gz")
}
runnerDir := filepath.Join(targetDir, runner)
if err := os.MkdirAll(runnerDir, 0o755); err != nil {
return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
}
base := filepath.Base(filename)
destFilename := filepath.Join(runnerDir, base)
_, err = os.Stat(destFilename)
switch {
case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
return fmt.Errorf("write payload %s: %v", filename, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, src); err != nil {
return fmt.Errorf("copy payload %s: %v", filename, err)
}
case err != nil:
return fmt.Errorf("stat payload %s: %v", filename, err)
}
return nil
})
}
slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
err = g.Wait()
if err != nil {
slog.Error("failed to extract files", "error", err)
// If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
err := os.RemoveAll(targetDir)
if err != nil {
slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
}
return err
}
return nil
}
// binary names may contain an optional variant separated by '_'
// For example, "ollama_rocm_v6" and "ollama_rocm_v5" or "ollama_cpu" and "ollama_cpu_avx2"
// Any library without a variant is the lowest common denominator
func getAvailableServers() map[string]string {
payloadsDir, err := gpu.PayloadsDir()
// Best effort to clean up prior tmpdirs
func cleanupTmpDirs() {
tmpDir := envconfig.TmpDir()
if tmpDir == "" {
tmpDir = os.TempDir()
}
matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
if err != nil {
slog.Error("payload lookup error", "error", err)
return
}
for _, match := range matches {
raw, err := os.ReadFile(match)
if errors.Is(err, os.ErrNotExist) {
slog.Debug("not a ollama runtime directory, skipping", "path", match)
continue
} else if err != nil {
slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
continue
}
pid, err := strconv.Atoi(string(raw))
if err != nil {
slog.Warn("invalid pid, skipping", "path", match, "error", err)
continue
}
p, err := os.FindProcess(pid)
if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
slog.Warn("process still running, skipping", "pid", pid, "path", match)
continue
}
if err := os.Remove(match); err != nil {
slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
}
runners := filepath.Join(filepath.Dir(match), "runners")
if err := os.RemoveAll(runners); err != nil {
slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
}
if err := os.Remove(filepath.Dir(match)); err != nil {
slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
}
}
}
// directory names are the name of the runner and may contain an optional
// variant prefixed with '_' as the separator. For example, "cuda_v11" and
// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
// lowest common denominator
func GetAvailableServers(payloadsDir string) map[string]string {
if payloadsDir == "" {
slog.Error("empty runner dir")
return nil
}
......@@ -78,9 +301,9 @@ func getAvailableServers() map[string]string {
// serversForGpu returns a list of compatible servers give the provided GPU
// info, ordered by performance. assumes Init() has been called
// TODO - switch to metadata based mapping
func serversForGpu(info gpu.GpuInfo) []string {
func ServersForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_
availableServers := getAvailableServers()
availableServers := GetAvailableServers(runnersDir)
requested := info.Library
if info.Variant != gpu.CPUCapabilityNone.String() {
requested += "_" + info.Variant
......@@ -144,12 +367,12 @@ func serversForGpu(info gpu.GpuInfo) []string {
}
// Return the optimal server for this CPU architecture
func serverForCpu() string {
func ServerForCpu() string {
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
return "metal"
}
variant := gpu.GetCPUCapability()
availableServers := getAvailableServers()
availableServers := GetAvailableServers(runnersDir)
if variant != gpu.CPUCapabilityNone {
for cmp := range availableServers {
if cmp == "cpu_"+variant.String() {
......@@ -159,75 +382,3 @@ func serverForCpu() string {
}
return "cpu"
}
// extract extracts the embedded files to the target directory
func extractFiles(targetDir string, glob string) error {
files, err := fs.Glob(libEmbed, glob)
if err != nil || len(files) == 0 {
return errPayloadMissing
}
if err := os.MkdirAll(targetDir, 0o755); err != nil {
return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
}
g := new(errgroup.Group)
// build/$OS/$GOARCH/$VARIANT/{bin,lib}/$FILE
for _, file := range files {
filename := file
variant := filepath.Base(filepath.Dir(filepath.Dir(filename)))
slog.Debug("extracting", "variant", variant, "file", filename)
g.Go(func() error {
srcf, err := libEmbed.Open(filename)
if err != nil {
return err
}
defer srcf.Close()
src := io.Reader(srcf)
if strings.HasSuffix(filename, ".gz") {
src, err = gzip.NewReader(src)
if err != nil {
return fmt.Errorf("decompress payload %s: %v", filename, err)
}
filename = strings.TrimSuffix(filename, ".gz")
}
variantDir := filepath.Join(targetDir, variant)
if err := os.MkdirAll(variantDir, 0o755); err != nil {
return fmt.Errorf("extractFiles could not mkdir %s: %v", variantDir, err)
}
base := filepath.Base(filename)
destFilename := filepath.Join(variantDir, base)
_, err = os.Stat(destFilename)
switch {
case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
return fmt.Errorf("write payload %s: %v", filename, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, src); err != nil {
return fmt.Errorf("copy payload %s: %v", filename, err)
}
case err != nil:
return fmt.Errorf("stat payload %s: %v", filename, err)
}
return nil
})
}
err = g.Wait()
if err != nil {
// If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted
gpu.Cleanup()
return err
}
return nil
}
package runners
import (
"log/slog"
"os"
"path"
"runtime"
"strings"
"testing"
"testing/fstest"
)
func TestRefreshRunners(t *testing.T) {
slog.SetLogLoggerLevel(slog.LevelDebug)
payloadFS := fstest.MapFS{
path.Join(runtime.GOOS, runtime.GOARCH, "foo", "ollama_llama_server"): {Data: []byte("hello, world\n")},
}
tmpDir, err := os.MkdirTemp("", "testing")
if err != nil {
t.Fatalf("failed to make tmp dir %s", err)
}
t.Setenv("OLLAMA_TMPDIR", tmpDir)
rDir, err := Refresh(payloadFS)
if err != nil {
t.Fatalf("failed to extract to %s %s", tmpDir, err)
}
if !strings.Contains(rDir, tmpDir) {
t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir)
}
// spot check results
servers := GetAvailableServers(rDir)
if len(servers) < 1 {
t.Fatalf("expected at least 1 server")
}
// Refresh contents
rDir, err = extractRunners(payloadFS)
if err != nil {
t.Fatalf("failed to extract to %s %s", tmpDir, err)
}
if !strings.Contains(rDir, tmpDir) {
t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir)
}
cleanupTmpDirs()
Cleanup(payloadFS)
}
......@@ -2,8 +2,7 @@
set -e
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
. $(dirname $0)/env.sh
mkdir -p dist
......
......@@ -2,76 +2,34 @@
set -eu
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
# We use 2 different image repositories to handle combining architecture images into multiarch manifest
# (The ROCm image is x86 only and is not a multiarch manifest)
# For developers, you can override the DOCKER_ORG to generate multiarch manifests
# DOCKER_ORG=jdoe PUSH=1 ./scripts/build_docker.sh
DOCKER_ORG=${DOCKER_ORG:-"ollama"}
RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
. $(dirname $0)/env.sh
# Set PUSH to a non-empty string to trigger push instead of load
PUSH=${PUSH:-""}
# In CI mode, we break things down
OLLAMA_SKIP_MANIFEST_CREATE=${OLLAMA_SKIP_MANIFEST_CREATE:-""}
OLLAMA_SKIP_IMAGE_BUILD=${OLLAMA_SKIP_IMAGE_BUILD:-""}
if [ -z "${PUSH}" ] ; then
echo "Building ${FINAL_IMAGE_REPO}:$VERSION locally. set PUSH=1 to push"
LOAD_OR_PUSH="--load"
else
echo "Will be pushing ${RELEASE_IMAGE_REPO}:$VERSION for ${BUILD_ARCH}"
echo "Will be pushing ${FINAL_IMAGE_REPO}:$VERSION"
LOAD_OR_PUSH="--push"
fi
if [ -z "${OLLAMA_SKIP_IMAGE_BUILD}" ]; then
for TARGETARCH in ${BUILD_ARCH}; do
docker build \
${LOAD_OR_PUSH} \
--platform=linux/${TARGETARCH} \
--build-arg=VERSION \
--build-arg=GOFLAGS \
-f Dockerfile \
-t ${RELEASE_IMAGE_REPO}:$VERSION-${TARGETARCH} \
.
done
if echo ${BUILD_ARCH} | grep "amd64" > /dev/null; then
docker build \
${LOAD_OR_PUSH} \
--platform=linux/amd64 \
--build-arg=VERSION \
--build-arg=GOFLAGS \
--target runtime-rocm \
-f Dockerfile \
-t ${RELEASE_IMAGE_REPO}:$VERSION-rocm \
.
fi
fi
if [ -z "${OLLAMA_SKIP_MANIFEST_CREATE}" ]; then
if [ -n "${PUSH}" ]; then
docker manifest create ${FINAL_IMAGE_REPO}:$VERSION \
${RELEASE_IMAGE_REPO}:$VERSION-amd64 \
${RELEASE_IMAGE_REPO}:$VERSION-arm64
docker manifest push ${FINAL_IMAGE_REPO}:$VERSION
# For symmetry, tag/push the rocm image
if [ "${RELEASE_IMAGE_REPO}" != "${FINAL_IMAGE_REPO}" ]; then
echo "Tagging and pushing rocm image"
docker pull ${RELEASE_IMAGE_REPO}:$VERSION-rocm
docker tag ${RELEASE_IMAGE_REPO}:$VERSION-rocm ${FINAL_IMAGE_REPO}:$VERSION-rocm
docker push ${FINAL_IMAGE_REPO}:$VERSION-rocm
fi
else
echo "Skipping manifest generation when not pushing images are available locally as "
echo " ${RELEASE_IMAGE_REPO}:$VERSION-amd64"
echo " ${RELEASE_IMAGE_REPO}:$VERSION-arm64"
echo " ${RELEASE_IMAGE_REPO}:$VERSION-rocm"
fi
fi
docker buildx build \
${LOAD_OR_PUSH} \
--platform=${PLATFORM} \
${OLLAMA_COMMON_BUILD_ARGS} \
-f Dockerfile \
-t ${FINAL_IMAGE_REPO}:$VERSION \
.
if echo $PLATFORM | grep "amd64" > /dev/null; then
docker buildx build \
${LOAD_OR_PUSH} \
--platform=linux/amd64 \
${OLLAMA_COMMON_BUILD_ARGS} \
--target runtime-rocm \
-f Dockerfile \
-t ${FINAL_IMAGE_REPO}:$VERSION-rocm \
.
fi
\ No newline at end of file
#!/bin/sh
#
# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder
#
# docker context create amd64 --docker host=ssh://mybuildhost
# docker buildx create --name mybuilder amd64 --platform linux/amd64
# docker buildx create --name mybuilder --append desktop-linux --platform linux/arm64
# docker buildx use mybuilder
set -eu
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
GZIP=$(which pigz 2>/dev/null || echo "gzip")
. $(dirname $0)/env.sh
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
mkdir -p dist
for TARGETARCH in ${BUILD_ARCH}; do
docker build \
--platform=linux/$TARGETARCH \
--build-arg=GOFLAGS \
--build-arg=CGO_CFLAGS \
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
--build-arg=AMDGPU_TARGETS \
--target build-$TARGETARCH \
docker buildx build \
--output type=local,dest=./dist/ \
--platform=${PLATFORM} \
${OLLAMA_COMMON_BUILD_ARGS} \
--target dist \
-f Dockerfile \
-t builder:$TARGETARCH \
.
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
rm -rf ./dist/linux-$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
if echo ${TARGETARCH} | grep "amd64" > /dev/null; then
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH-rocm ./dist
fi
docker rm builder-$TARGETARCH
echo "Compressing final linux bundle..."
rm -f ./dist/ollama-linux-$TARGETARCH.tgz
(cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz )
if [ -d dist/linux-$TARGETARCH-rocm ]; then
(cd dist/linux-$TARGETARCH-rocm && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH-rocm.tgz )
fi
done
# buildx behavior changes for single vs. multiplatform
if echo $PLATFORM | grep "," > /dev/null ; then
mv -f ./dist/linux_*64/ollama* ./dist/
rmdir ./dist/linux_*64
fi
\ No newline at end of file
# Common environment setup across build*.sh scripts
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
# TODO - consider `docker buildx ls --format=json` to autodiscover platform capability
PLATFORM=${PLATFORM:-"linux/arm64,linux/amd64"}
DOCKER_ORG=${DOCKER_ORG:-"ollama"}
RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION --build-arg=GOFLAGS --build-arg=OLLAMA_CUSTOM_CPU_DEFS --build-arg=AMDGPU_TARGETS"
echo "Building Ollama"
echo "VERSION=$VERSION"
echo "PLATFORM=$PLATFORM"
\ No newline at end of file
......@@ -26,11 +26,13 @@ import (
"golang.org/x/sync/errgroup"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/build"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/runners"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
......@@ -1216,12 +1218,12 @@ func Serve(ln net.Listener) error {
srvr.Close()
schedDone()
sched.unloadAllRunners()
gpu.Cleanup()
runners.Cleanup(build.EmbedFS)
done()
}()
if err := llm.Init(); err != nil {
return fmt.Errorf("unable to initialize llm library %w", err)
if _, err := runners.Refresh(build.EmbedFS); err != nil {
return fmt.Errorf("unable to initialize llm runners %w", err)
}
s.sched.Run(schedCtx)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment