update

0cb78a2f · xuxzh1 · 217903ab · 0cb78a2f · 0cb78a2f · 0cb78a2f
Commit 0cb78a2f authored Dec 06, 2024 by xuxzh1 🎱
20 changed files
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,7 +3,7 @@ ollama
 app
 macapp
 dist
-llm/llama.cpp
 .env
 .cache
 test_data
+llama/build
--- a/.gitattributes
+++ b/.gitattributes
-llm/ext_server/* linguist-vendored
+llama/**/*.cpp linguist-vendored
+llama/**/*.hpp linguist-vendored
+llama/**/*.h linguist-vendored
+llama/**/*.c linguist-vendored
+llama/**/*.cu linguist-vendored
+llama/**/*.cuh linguist-vendored
+llama/**/*.m linguist-vendored
+llama/**/*.metal linguist-vendored
 * text=auto
 *.go text eol=lf
--- a/.gitignore
+++ b/.gitignore
@@ -5,11 +5,14 @@
 .swp
 dist
 ollama
-ggml-metal.metal
 .cache
 *.exe
 .idea
 test_data
 *.crt
 llm/build
-__debug_bin*
+build/*/*/*
\ No newline at end of file
+!build/**/placeholder
+llama/build
+__debug_bin*
+llama/vendor
\ No newline at end of file
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -24,7 +24,6 @@ linters:
    - nosprintfhostport
    - staticcheck
    - tenv
-    - testifylint
    - unconvert
    - unused
    - usestdlibvars
@@ -33,6 +32,10 @@ linters:
 linters-settings:
  gci:
    sections: [standard, default, localmodule]
+  staticcheck:
+    checks:
+      - all
+      - -SA1019 # omit Deprecated check
 severity:
  default-severity: error
  rules:

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
+# Contributing to Ollama
+Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
+## Set up
+See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
+## Pull requests
+### Ideal issues
+* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
+* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
+* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
+### Issues that are harder to review
+* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
+* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
+* Documentation: small updates to fill in or correct missing documentation is helpful, however large documentation additions can be hard to maintain over time.
+### Issues that may not be accepted
+* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
+* Changes that add significant friction to the user experience
+* Changes that create a large future maintenance burden for maintainers and contributors
+### Best practices
+* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
+* Tests: please add test coverage to changes where possible.
+* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
+## Need help?
+If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).
--- a/Dockerfile
+++ b/Dockerfile
-ARG GOLANG_VERSION=1.22.5
+ARG GOLANG_VERSION=1.22.8
 ARG CMAKE_VERSION=3.22.1
-# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
+ARG CUDA_VERSION_11=11.3.1
-ARG CUDA_VERSION=11.3.1
+ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
+ARG CUDA_VERSION_12=12.4.0
+ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
 ARG ROCM_VERSION=6.1.2
+ARG JETPACK_6=r36.2.0
+ARG JETPACK_5=r35.4.1
-# Copy the minimal context we need to run the generate scripts
+### To create a local image for building linux binaries on mac or windows with efficient incremental builds
-FROM scratch AS llm-code
+#
-COPY .git .git
+# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
-COPY .gitmodules .gitmodules
+# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
-COPY llm llm
+#
+### Then incremental builds will be much faster in this container
-FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
+#
+# make -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
+#
+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
 ARG CMAKE_VERSION
+ARG GOLANG_VERSION
+ARG CUDA_VERSION_11
+ARG CUDA_VERSION_12
 COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ARG CGO_CFLAGS
+RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
+    dnf clean all && \
+    dnf install -y \
+    zsh \
+    cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
+    cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
+# TODO intel oneapi goes here...
+ENV GOARCH amd64
+ENV CGO_ENABLED 1
+WORKDIR /go/src/github.com/ollama/ollama/
+ENTRYPOINT [ "zsh" ]
-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
+### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
+# Note: this does not contain jetson variants
+#
+# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
+# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
+#
+FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
 ARG CMAKE_VERSION
+ARG GOLANG_VERSION
+ARG CUDA_VERSION_11
+ARG CUDA_VERSION_12
 COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
+RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+    dnf config-manager --set-enabled appstream && \
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+    dnf clean all && \
+    dnf install -y \
+    zsh \
+    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
+    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
+ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
+ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
+ENV GOARCH amd64
+ENV CGO_ENABLED 1
+WORKDIR /go/src/github.com/ollama/ollama/
+ENTRYPOINT [ "zsh" ]
+FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
+COPY . .
+ARG OLLAMA_SKIP_CUDA_GENERATE
+ARG OLLAMA_SKIP_CUDA_11_GENERATE
+ARG OLLAMA_SKIP_CUDA_12_GENERATE
+ARG OLLAMA_SKIP_ROCM_GENERATE
+ARG CUDA_V11_ARCHITECTURES
+ARG CUDA_V12_ARCHITECTURES
+ARG OLLAMA_FAST_BUILD
+RUN --mount=type=cache,target=/root/.ccache \
+    if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
+        make -j $(expr $(nproc) / 2 ) ; \
+    else \
+        make -j 5 ; \
+    fi
+FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
+COPY . .
+ARG OLLAMA_SKIP_CUDA_GENERATE
+ARG OLLAMA_SKIP_CUDA_11_GENERATE
+ARG OLLAMA_SKIP_CUDA_12_GENERATE
+ARG CUDA_V11_ARCHITECTURES
+ARG CUDA_V12_ARCHITECTURES
+ARG OLLAMA_FAST_BUILD
+RUN --mount=type=cache,target=/root/.ccache \
+    make -j 5
+# Jetsons need to be built in discrete stages
+FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
+ARG GOLANG_VERSION
+RUN apt-get update && apt-get install -y git curl ccache && \
+    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
+    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
+    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+WORKDIR /go/src/github.com/ollama/ollama/
+COPY . .
 ARG CGO_CFLAGS
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
+ENV GOARCH arm64
+RUN --mount=type=cache,target=/root/.ccache \
+    make -j 5 cuda_v11 \
+        CUDA_ARCHITECTURES="72;87" \
+        GPU_RUNNER_VARIANT=_jetpack5 \
+        CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
+        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
+        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
+FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
-ARG CMAKE_VERSION
+ARG GOLANG_VERSION
-COPY ./scripts/rh_linux_deps.sh /
+RUN apt-get update && apt-get install -y git curl ccache && \
-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
+    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
+    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
-ENV LIBRARY_PATH /opt/amdgpu/lib64
+    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+    apt-get clean && rm -rf /var/lib/apt/lists/*
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+WORKDIR /go/src/github.com/ollama/ollama/
+COPY . .
 ARG CGO_CFLAGS
-ARG AMDGPU_TARGETS
+ENV GOARCH arm64
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
+RUN --mount=type=cache,target=/root/.ccache \
-RUN mkdir /tmp/scratch && \
+    make -j 5 cuda_v12 \
-    for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
+        CUDA_ARCHITECTURES="87" \
-        cp ${dep} /tmp/scratch/ || exit 1 ; \
+        GPU_RUNNER_VARIANT=_jetpack6 \
-    done && \
+        CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
-    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
+        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
-    mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
+        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
-    (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
+# Intermediate stages used for ./scripts/build_linux.sh
-FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
+FROM --platform=linux/amd64 centos:7 AS builder-amd64
 ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+ENV CGO_ENABLED 1
-ARG OLLAMA_CUSTOM_CPU_DEFS
+ENV GOARCH amd64
+WORKDIR /go/src/github.com/ollama/ollama
+FROM --platform=linux/amd64 builder-amd64 AS build-amd64
+COPY . .
+COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
+ARG GOFLAGS
 ARG CGO_CFLAGS
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+ARG OLLAMA_SKIP_ROCM_GENERATE
+RUN --mount=type=cache,target=/root/.ccache \
-FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
+    go build -trimpath -o dist/linux-amd64/bin/ollama .
-RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
+RUN cd dist/linux-$GOARCH && \
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
+RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
+    cd dist/linux-$GOARCH-rocm && \
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
-FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
+    fi
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
+FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
-FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
 ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
 ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
-COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+ENV CGO_ENABLED 1
-ARG OLLAMA_CUSTOM_CPU_DEFS
+ENV GOARCH arm64
+WORKDIR /go/src/github.com/ollama/ollama
+FROM --platform=linux/arm64 builder-arm64 AS build-arm64
+COPY . .
+COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
+COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/build/ build/
+COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
+COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/build/ build/
+ARG GOFLAGS
 ARG CGO_CFLAGS
-WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+RUN --mount=type=cache,target=/root/.ccache \
+    go build -trimpath -o dist/linux-arm64/bin/ollama .
+RUN cd dist/linux-$GOARCH && \
+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
+RUN cd dist/linux-$GOARCH-jetpack5 && \
+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
+RUN cd dist/linux-$GOARCH-jetpack6 && \
+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
-FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
+FROM --platform=linux/amd64 scratch AS dist-amd64
-RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
-FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
+FROM --platform=linux/arm64 scratch AS dist-arm64
-RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
+FROM dist-$TARGETARCH AS dist
-# Intermediate stage used for ./scripts/build_linux.sh
+# Optimized container images do not cary nested payloads
-FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
+FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
-ENV CGO_ENABLED 1
 WORKDIR /go/src/github.com/ollama/ollama
 COPY . .
-COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN go build -trimpath .
+RUN --mount=type=cache,target=/root/.ccache \
+    go build -trimpath -o dist/linux-amd64/bin/ollama .
-# Intermediate stage used for ./scripts/build_linux.sh
+FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
-FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
-ENV CGO_ENABLED 1
-ARG GOLANG_VERSION
 WORKDIR /go/src/github.com/ollama/ollama
 COPY . .
-COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
-COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN go build -trimpath .
+RUN --mount=type=cache,target=/root/.ccache \
+    go build -trimpath -o dist/linux-arm64/bin/ollama .
-# Runtime stages
-FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
+# For amd64 container images, filter out cuda/rocm to minimize size
-RUN apt-get update && apt-get install -y ca-certificates
+FROM runners-amd64 AS runners-cuda-amd64
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
+RUN rm -rf \
-FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
+    ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
-RUN apt-get update && apt-get install -y ca-certificates
+    ./dist/linux-amd64/lib/ollama/runners/rocm*
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
+FROM runners-amd64 AS runners-rocm-amd64
-# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
+RUN rm -rf \
-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
+    ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
-RUN update-pciids
+    ./dist/linux-amd64/lib/ollama/libcu*.so* \
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
+    ./dist/linux-amd64/lib/ollama/runners/cuda*
+FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
+RUN apt-get update && \
+    apt-get install -y ca-certificates && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
+FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
+RUN apt-get update && \
+    apt-get install -y ca-certificates && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
+COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
+COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
+COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
+# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
+FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
+# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
+# across releases
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
+RUN apt-get update && \
+    apt-get install -y ca-certificates && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
 EXPOSE 11434
 ENV OLLAMA_HOST 0.0.0.0

--- a/Makefile
+++ b/Makefile
+GOALS := $(or $(MAKECMDGOALS),all)
+.PHONY: $(GOALS)
+$(GOALS):
+	$(MAKE) -C llama $@
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -6,13 +6,7 @@
 ## 安装
-### 1、使用dockerfile方式安装
+### 1、使用源码编译方式安装
-直接下载本仓库中的`v0.3.5`分支，然后执行`docker build xxxxx`相关命令（具体需自行查阅相关资料）。
-如遇到卡数检测错误，请参考 https://developer.hpccube.com/codes/OpenDAS/ollama/-/issues/1 ，也可以提前进行修复。
-### 2、使用源码编译方式安装（推荐）
 #### 环境准备
@@ -22,10 +16,9 @@
    docker run -i -t -d  --device=/dev/kfd --privileged --network=host --device=/dev/dri --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v 项目地址(绝对路径):/home  -v /opt/hyhal:/opt/hyhal:ro -v --group-add video --shm-size 16G --name {容器名} {镜像ID}
 1、下载源码
-    git clone -b 0.3.5 http://developer.sourcefind.cn/codes/OpenDAS/ollama.git --depth=1
+    git clone -b 0.4.7 http://developer.sourcefind.cn/codes/OpenDAS/ollama.git --depth=1
    cd ollama
 2、安装依赖包
@@ -43,8 +36,9 @@
 安装go
+    wget https://go.dev/dl/go1.22.8.linux-amd64.tar.gz
    cd ../..
-    tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz
+    tar -C /usr/local -xzf go1.22.8.linux-amd64.tar.gz
    export PATH=$PATH:/usr/local/go/bin
    # 修改go下载源，提升速度（按需设置）
@@ -53,8 +47,9 @@
 ##### 运行编译
-    cd llm/generate && bash gen_linux.sh
+    export LD_LIBRARY_PATH=/path/to/ollama/llama/build/linux-amd64/runners/rocm:$LD_LIBRARY_PATH
-    cd ../.. && go build
+    make -j 16
+    go build .
 ## 验证

--- a/api/client.go
+++ b/api/client.go
@@ -55,7 +55,7 @@ func checkError(resp *http.Response, body []byte) error {
 // ClientFromEnvironment creates a new [Client] using configuration from the
 // environment variable OLLAMA_HOST, which points to the network host and
-// port on which the ollama service is listenting. The format of this variable
+// port on which the ollama service is listening. The format of this variable
 // is:
 //
 //	<scheme>://<host>:<port>
@@ -298,7 +298,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
 	return &lr, nil
 }
-// List running models.
+// ListRunning lists running models.
 func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
 	var lr ProcessResponse
 	if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
@@ -333,7 +333,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
 	return &resp, nil
 }
-// Hearbeat checks if the server has started and is responsive; if yes, it
+// Heartbeat checks if the server has started and is responsive; if yes, it
 // returns nil, otherwise an error.
 func (c *Client) Heartbeat(ctx context.Context) error {
 	if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {

--- a/api/types.go
+++ b/api/types.go
@@ -12,7 +12,7 @@ import (
 	"time"
 )
-// StatusError is an error with and HTTP status code.
+// StatusError is an error with an HTTP status code and message.
 type StatusError struct {
 	StatusCode   int
 	Status       string
@@ -57,7 +57,7 @@ type GenerateRequest struct {
 	Template string `json:"template"`
 	// Context is the context parameter returned from a previous call to
-	// Generate call. It can be used to keep a short conversational memory.
+	// [Client.Generate]. It can be used to keep a short conversational memory.
 	Context []int `json:"context,omitempty"`
 	// Stream specifies whether the response is streaming; it is true by default.
@@ -90,14 +90,14 @@ type ChatRequest struct {
 	// Messages is the messages of the chat - can be used to keep a chat memory.
 	Messages []Message `json:"messages"`
-	// Stream enable streaming of returned response; true by default.
+	// Stream enables streaming of returned responses; true by default.
 	Stream *bool `json:"stream,omitempty"`
 	// Format is the format to return the response in (e.g. "json").
 	Format string `json:"format"`
 	// KeepAlive controls how long the model will stay loaded into memory
-	// followin the request.
+	// following the request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 	// Tools is an optional list of tools the model has access to.
@@ -146,6 +146,7 @@ type ToolCall struct {
 }
 type ToolCallFunction struct {
+	Index     int                       `json:"index,omitempty"`
 	Name      string                    `json:"name"`
 	Arguments ToolCallFunctionArguments `json:"arguments"`
 }
@@ -203,8 +204,8 @@ type Metrics struct {
 	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
 }
-// Options specified in [GenerateRequest], if you add a new option here add it
+// Options specified in [GenerateRequest].  If you add a new option here, also
-// to the API docs also.
+// add it to the API docs.
 type Options struct {
 	Runner
@@ -236,7 +237,7 @@ type Runner struct {
 	NumGPU    int   `json:"num_gpu,omitempty"`
 	MainGPU   int   `json:"main_gpu,omitempty"`
 	LowVRAM   bool  `json:"low_vram,omitempty"`
-	F16KV     bool  `json:"f16_kv,omitempty"`
+	F16KV     bool  `json:"f16_kv,omitempty"` // Deprecated: This option is ignored
 	LogitsAll bool  `json:"logits_all,omitempty"`
 	VocabOnly bool  `json:"vocab_only,omitempty"`
 	UseMMap   *bool `json:"use_mmap,omitempty"`
@@ -296,15 +297,17 @@ type EmbeddingResponse struct {
 // CreateRequest is the request passed to [Client.Create].
 type CreateRequest struct {
 	Model     string `json:"model"`
-	Path      string `json:"path"`
 	Modelfile string `json:"modelfile"`
 	Stream    *bool  `json:"stream,omitempty"`
 	Quantize  string `json:"quantize,omitempty"`
-	// Name is deprecated, see Model
+	// Deprecated: set the model name with Model instead
 	Name string `json:"name"`
-	// Quantization is deprecated, see Quantize
+	// Deprecated: set the file content with Modelfile instead
+	Path string `json:"path"`
+	// Deprecated: use Quantize instead
 	Quantization string `json:"quantization,omitempty"`
 }
@@ -312,7 +315,7 @@ type CreateRequest struct {
 type DeleteRequest struct {
 	Model string `json:"model"`
-	// Name is deprecated, see Model
+	// Deprecated: set the model name with Model instead
 	Name string `json:"name"`
 }
@@ -327,7 +330,7 @@ type ShowRequest struct {
 	Options map[string]interface{} `json:"options"`
-	// Name is deprecated, see Model
+	// Deprecated: set the model name with Model instead
 	Name string `json:"name"`
 }
@@ -359,7 +362,7 @@ type PullRequest struct {
 	Password string `json:"password"`
 	Stream   *bool  `json:"stream,omitempty"`
-	// Name is deprecated, see Model
+	// Deprecated: set the model name with Model instead
 	Name string `json:"name"`
 }
@@ -380,7 +383,7 @@ type PushRequest struct {
 	Password string `json:"password"`
 	Stream   *bool  `json:"stream,omitempty"`
-	// Name is deprecated, see Model
+	// Deprecated: set the model name with Model instead
 	Name string `json:"name"`
 }
@@ -611,7 +614,6 @@ func DefaultOptions() Options {
 			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
 			NumThread: 0,  // let the runtime decide
 			LowVRAM:   false,
-			F16KV:     true,
 			UseMLock:  false,
 			UseMMap:   nil,
 		},

--- a/app/lifecycle/lifecycle.go
+++ b/app/lifecycle/lifecycle.go
@@ -11,10 +11,12 @@ import (
 	"github.com/ollama/ollama/app/store"
 	"github.com/ollama/ollama/app/tray"
+	"github.com/ollama/ollama/envconfig"
 )
 func Run() {
 	InitLogging()
+	slog.Info("app config", "env", envconfig.Values())
 	ctx, cancel := context.WithCancel(context.Background())
 	var done chan int

--- a/app/lifecycle/paths.go
+++ b/app/lifecycle/paths.go
@@ -36,8 +36,13 @@ func init() {
 		ServerLogFile = filepath.Join(AppDataDir, "server.log")
 		UpgradeLogFile = filepath.Join(AppDataDir, "upgrade.log")
-		// Executables are stored in APPDATA
+		exe, err := os.Executable()
-		AppDir = filepath.Join(localAppData, "Programs", "Ollama")
+		if err != nil {
+			slog.Warn("error discovering executable directory", "error", err)
+			AppDir = filepath.Join(localAppData, "Programs", "Ollama")
+		} else {
+			AppDir = filepath.Dir(exe)
+		}
 		// Make sure we have PATH set correctly for any spawned children
 		paths := strings.Split(os.Getenv("PATH"), ";")
@@ -64,7 +69,7 @@ func init() {
 		}
 		// Make sure our logging dir exists
-		_, err := os.Stat(AppDataDir)
+		_, err = os.Stat(AppDataDir)
 		if errors.Is(err, os.ErrNotExist) {
 			if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
 				slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))

--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -18,11 +18,17 @@ func getCLIFullPath(command string) string {
 	var cmdPath string
 	appExe, err := os.Executable()
 	if err == nil {
+		// Check both the same location as the tray app, as well as ./bin
 		cmdPath = filepath.Join(filepath.Dir(appExe), command)
 		_, err := os.Stat(cmdPath)
 		if err == nil {
 			return cmdPath
 		}
+		cmdPath = filepath.Join(filepath.Dir(appExe), "bin", command)
+		_, err = os.Stat(cmdPath)
+		if err == nil {
+			return cmdPath
+		}
 	}
 	cmdPath, err = exec.LookPath(command)
 	if err == nil {

--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@@ -26,19 +26,15 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 	slog.Info("starting upgrade with " + installerExe)
 	slog.Info("upgrade log file " + UpgradeLogFile)
-	// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
+	// make the upgrade show progress, but non interactive
 	installArgs := []string{
 		"/CLOSEAPPLICATIONS",                    // Quit the tray app if it's still running
 		"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
 		"/FORCECLOSEAPPLICATIONS",               // Force close the tray app - might be needed
-	}
+		"/SP",                                   // Skip the "This will install... Do you wish to continue" prompt
-	// make the upgrade as quiet as possible (no GUI, no prompts)
+		"/NOCANCEL",                             // Disable the ability to cancel upgrade mid-flight to avoid partially installed upgrades
-	installArgs = append(installArgs,
-		"/SP", // Skip the "This will install... Do you wish to continue" prompt
-		"/SUPPRESSMSGBOXES",
 		"/SILENT",
-		"/VERYSILENT",
+	}
-	)
 	// Safeguard in case we have requests in flight that need to drain...
 	slog.Info("Waiting for server to shutdown")

--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -28,8 +28,8 @@ AppPublisher={#MyAppPublisher}
 AppPublisherURL={#MyAppURL}
 AppSupportURL={#MyAppURL}
 AppUpdatesURL={#MyAppURL}
-ArchitecturesAllowed=x64 arm64
+ArchitecturesAllowed=x64compatible arm64
-ArchitecturesInstallIn64BitMode=x64 arm64
+ArchitecturesInstallIn64BitMode=x64compatible arm64
 DefaultDirName={localappdata}\Programs\{#MyAppName}
 DefaultGroupName={#MyAppName}
 DisableProgramGroupPage=yes
@@ -48,12 +48,13 @@ OutputDir=..\dist\
 SetupLogging=yes
 CloseApplications=yes
 RestartApplications=no
+RestartIfNeededByRun=no
 ; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile
 WizardSmallImageFile=.\assets\setup.bmp
-; TODO verifty actual min windows version...
+; Ollama requires Windows 10 22H2 or newer for proper unicode rendering
-; OG Win 10
+; TODO: consider setting this to 10.0.19045
 MinVersion=10.0.10240
 ; First release that supports WinRT UI Composition for win32 apps
@@ -86,21 +87,21 @@ Name: "english"; MessagesFile: "compiler:Default.isl"
 DialogFontSize=12
 [Files]
-Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
+#if DirExists("..\dist\windows-amd64")
-Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
+Source: "..\dist\windows-amd64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: not IsArm64();  Flags: ignoreversion 64bit
-Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
+Source: "..\dist\windows-amd64\ollama.exe"; DestDir: "{app}"; Check: not IsArm64(); Flags: ignoreversion 64bit
-Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
+Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: not IsArm64(); Flags: ignoreversion 64bit recursesubdirs
-Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
-#if DirExists("..\dist\windows-amd64\cuda")
-  Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
-#endif
-#if DirExists("..\dist\windows-amd64\oneapi")
-  Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
 #endif
-#if DirExists("..\dist\windows-amd64\rocm")
-  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
+#if DirExists("..\dist\windows-arm64")
+Source: "..\dist\windows-arm64\vc_redist.arm64.exe"; DestDir: "{tmp}"; Check: IsArm64() and vc_redist_needed(); Flags: deleteafterinstall
+Source: "..\dist\windows-arm64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: IsArm64();  Flags: ignoreversion 64bit
+Source: "..\dist\windows-arm64\ollama.exe"; DestDir: "{app}"; Check: IsArm64(); Flags: ignoreversion 64bit
+Source: "..\dist\windows-arm64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: IsArm64(); Flags: ignoreversion 64bit recursesubdirs
 #endif
+Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
+Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
 [Icons]
 Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
@@ -108,6 +109,9 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
 Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
 [Run]
+#if DirExists("..\dist\windows-arm64")
+Filename: "{tmp}\vc_redist.arm64.exe"; Parameters: "/install /passive /norestart"; Check: IsArm64() and vc_redist_needed(); StatusMsg: "Installing VC++ Redistributables..."; Flags: waituntilterminated
+#endif
 Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
 [UninstallRun]
@@ -132,13 +136,13 @@ Type: filesandordirs; Name: "{%TEMP}\ollama*"
 Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
 [Messages]
-WizardReady=Ollama Windows Preview
+WizardReady=Ollama
 ReadyLabel1=%nLet's get you up and running with your own large language models.
 SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit.
 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.1
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.2
 ;ClickFinish=%n
 [Registry]
@@ -163,3 +167,39 @@ begin
  { Pos() returns 0 if not found }
  Result := Pos(';' + ExpandConstant(Param) + ';', ';' + OrigPath + ';') = 0;
 end;
+{ --- VC Runtime libraries discovery code - Only install vc_redist if it isn't already installed ----- }
+const VCRTL_MIN_V1 = 14;
+const VCRTL_MIN_V2 = 40;
+const VCRTL_MIN_V3 = 33807;
+const VCRTL_MIN_V4 = 0;
+ // check if the minimum required vc redist is installed (by looking the registry)
+function vc_redist_needed (): Boolean;
+var
+  sRegKey: string;
+  v1: Cardinal;
+  v2: Cardinal;
+  v3: Cardinal;
+  v4: Cardinal;
+begin
+  sRegKey := 'SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\arm64';
+  if (RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Major', v1)  and
+      RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Minor', v2) and
+      RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Bld', v3) and
+      RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'RBld', v4)) then
+  begin
+    Log ('VC Redist version: ' + IntToStr (v1) +
+        '.' + IntToStr (v2) + '.' + IntToStr (v3) +
+        '.' + IntToStr (v4));
+    { Version info was found. Return true if later or equal to our
+       minimal required version RTL_MIN_Vx }
+    Result := not (
+        (v1 > VCRTL_MIN_V1) or ((v1 = VCRTL_MIN_V1) and
+         ((v2 > VCRTL_MIN_V2) or ((v2 = VCRTL_MIN_V2) and
+          ((v3 > VCRTL_MIN_V3) or ((v3 = VCRTL_MIN_V3) and
+           (v4 >= VCRTL_MIN_V4)))))));
+  end
+  else
+    Result := TRUE;
+end;
--- a/app/ollama_welcome.ps1
+++ b/app/ollama_welcome.ps1
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
 write-host ""
 write-host "Run your first model:"
 write-host ""
-write-host "`tollama run llama3.1"
+write-host "`tollama run llama3.2"
 write-host ""
\ No newline at end of file
--- a/app/store/store.go
+++ b/app/store/store.go
@@ -64,7 +64,7 @@ func initStore() {
 		slog.Debug(fmt.Sprintf("unexpected error searching for store: %s", err))
 	}
 	slog.Debug("initializing new store")
-	store.ID = uuid.New().String()
+	store.ID = uuid.NewString()
 	writeStore(getStorePath())
 }

--- a/app/tray/wintray/menus.go
+++ b/app/tray/wintray/menus.go
@@ -11,12 +11,13 @@ import (
 )
 const (
-	updatAvailableMenuID = 1
+	_ = iota
-	updateMenuID         = updatAvailableMenuID + 1
+	updateAvailableMenuID
-	separatorMenuID      = updateMenuID + 1
+	updateMenuID
-	diagLogsMenuID       = separatorMenuID + 1
+	separatorMenuID
-	diagSeparatorMenuID  = diagLogsMenuID + 1
+	diagLogsMenuID
-	quitMenuID           = diagSeparatorMenuID + 1
+	diagSeparatorMenuID
+	quitMenuID
 )
 func (t *winTray) initMenus() error {
@@ -35,10 +36,10 @@ func (t *winTray) initMenus() error {
 func (t *winTray) UpdateAvailable(ver string) error {
 	if !t.updateNotified {
 		slog.Debug("updating menu and sending notification for new update")
-		if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
+		if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
 			return fmt.Errorf("unable to create menu entries %w", err)
 		}
-		if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
+		if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenuTitle, false); err != nil {
 			return fmt.Errorf("unable to create menu entries %w", err)
 		}
 		if err := t.addSeparatorMenuItem(separatorMenuID, 0); err != nil {

--- a/app/tray/wintray/messages.go
+++ b/app/tray/wintray/messages.go
@@ -10,6 +10,6 @@ const (
 	quitMenuTitle            = "Quit Ollama"
 	updateAvailableMenuTitle = "An update is available"
-	updateMenutTitle         = "Restart to update"
+	updateMenuTitle          = "Restart to update"
 	diagLogsMenuTitle        = "View logs"
 )
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@@ -11,6 +11,7 @@ import (
 	"path/filepath"
 	"sort"
 	"sync"
+	"syscall"
 	"unsafe"
 	"golang.org/x/sys/windows"
@@ -360,7 +361,7 @@ func (t *winTray) showMenu() error {
 	boolRet, _, err = pTrackPopupMenu.Call(
 		uintptr(t.menus[0]),
-		TPM_BOTTOMALIGN|TPM_LEFTALIGN,
+		TPM_BOTTOMALIGN|TPM_LEFTALIGN|TPM_RIGHTBUTTON,
 		uintptr(p.X),
 		uintptr(p.Y),
 		0,
@@ -433,7 +434,12 @@ func (t *winTray) setIcon(src string) error {
 	t.muNID.Lock()
 	defer t.muNID.Unlock()
 	t.nid.Icon = h
-	t.nid.Flags |= NIF_ICON
+	t.nid.Flags |= NIF_ICON | NIF_TIP
+	if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
+		copy(t.nid.Tip[:], toolTipUTF16)
+	} else {
+		return err
+	}
 	t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
 	return t.nid.modify()