"docs/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "e46ec5f88fec23870538df782258c59271b010fd"
Unverified Commit 20c5fd39 authored by Devon Rifkin's avatar Devon Rifkin Committed by GitHub
Browse files

Merge branch 'main' into drifkin/array-head-count-simple

parents d2ee599d 6e9a7a25
...@@ -103,11 +103,6 @@ jobs: ...@@ -103,11 +103,6 @@ jobs:
arch: [amd64] arch: [amd64]
preset: ['CPU'] preset: ['CPU']
include: include:
- os: windows
arch: amd64
preset: 'CUDA 11'
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
cuda-version: '11.3'
- os: windows - os: windows
arch: amd64 arch: amd64
preset: 'CUDA 12' preset: 'CUDA 12'
...@@ -324,7 +319,6 @@ jobs: ...@@ -324,7 +319,6 @@ jobs:
case "$COMPONENT" in case "$COMPONENT" in
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;; lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;; lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
...@@ -432,6 +426,22 @@ jobs: ...@@ -432,6 +426,22 @@ jobs:
docker buildx imagetools inspect ollama/ollama:${{ steps.metadata.outputs.version }} docker buildx imagetools inspect ollama/ollama:${{ steps.metadata.outputs.version }}
working-directory: ${{ runner.temp }} working-directory: ${{ runner.temp }}
# Trigger downstream release process
trigger:
runs-on: ubuntu-latest
environment: release
needs: [darwin-build, windows-build, windows-depends]
steps:
- name: Trigger downstream release process
run: |
curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.RELEASE_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/ollama/${{ vars.RELEASE_REPO }}/dispatches \
-d "{\"event_type\": \"trigger-workflow\", \"client_payload\": {\"run_id\": \"${GITHUB_RUN_ID}\", \"version\": \"${GITHUB_REF_NAME#v}\"}}"
# Aggregate all the assets and ship a release # Aggregate all the assets and ship a release
release: release:
needs: [darwin-sign, windows-sign, linux-build] needs: [darwin-sign, windows-sign, linux-build]
......
...@@ -46,7 +46,7 @@ jobs: ...@@ -46,7 +46,7 @@ jobs:
include: include:
- preset: CPU - preset: CPU
- preset: CUDA - preset: CUDA
container: nvidia/cuda:11.8.0-devel-ubuntu22.04 container: nvidia/cuda:12.8.1-devel-ubuntu22.04
flags: '-DCMAKE_CUDA_ARCHITECTURES=87' flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
- preset: ROCm - preset: ROCm
container: rocm/dev-ubuntu-22.04:6.1.2 container: rocm/dev-ubuntu-22.04:6.1.2
...@@ -78,7 +78,7 @@ jobs: ...@@ -78,7 +78,7 @@ jobs:
include: include:
- preset: CPU - preset: CPU
- preset: CUDA - preset: CUDA
install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
flags: '-DCMAKE_CUDA_ARCHITECTURES=80' flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
- preset: ROCm - preset: ROCm
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
...@@ -102,7 +102,7 @@ jobs: ...@@ -102,7 +102,7 @@ jobs:
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') { if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe" Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait
} }
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path $cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
......
...@@ -19,8 +19,8 @@ linters: ...@@ -19,8 +19,8 @@ linters:
- nolintlint - nolintlint
- nosprintfhostport - nosprintfhostport
- staticcheck - staticcheck
- tenv
- unconvert - unconvert
- usetesting
- wastedassign - wastedassign
- whitespace - whitespace
disable: disable:
......
...@@ -17,14 +17,6 @@ ...@@ -17,14 +17,6 @@
"name": "CUDA", "name": "CUDA",
"inherits": [ "Default" ] "inherits": [ "Default" ]
}, },
{
"name": "CUDA 11",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86",
"CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets"
}
},
{ {
"name": "CUDA 12", "name": "CUDA 12",
"inherits": [ "CUDA" ], "inherits": [ "CUDA" ],
...@@ -78,11 +70,6 @@ ...@@ -78,11 +70,6 @@
"configurePreset": "CUDA", "configurePreset": "CUDA",
"targets": [ "ggml-cuda" ] "targets": [ "ggml-cuda" ]
}, },
{
"name": "CUDA 11",
"inherits": [ "CUDA" ],
"configurePreset": "CUDA 11"
},
{ {
"name": "CUDA 12", "name": "CUDA 12",
"inherits": [ "CUDA" ], "inherits": [ "CUDA" ],
......
...@@ -7,14 +7,10 @@ ARG JETPACK5VERSION=r35.4.1 ...@@ -7,14 +7,10 @@ ARG JETPACK5VERSION=r35.4.1
ARG JETPACK6VERSION=r36.4.0 ARG JETPACK6VERSION=r36.4.0
ARG CMAKEVERSION=3.31.2 ARG CMAKEVERSION=3.31.2
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
RUN yum install -y yum-utils \ RUN yum install -y yum-utils \
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \ && dnf install -y ccache \
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
FROM --platform=linux/arm64 almalinux:8 AS base-arm64 FROM --platform=linux/arm64 almalinux:8 AS base-arm64
# install epel-release for ccache # install epel-release for ccache
...@@ -38,15 +34,6 @@ RUN --mount=type=cache,target=/root/.ccache \ ...@@ -38,15 +34,6 @@ RUN --mount=type=cache,target=/root/.ccache \
&& cmake --build --parallel --preset 'CPU' \ && cmake --build --parallel --preset 'CPU' \
&& cmake --install build --component CPU --strip --parallel 8 && cmake --install build --component CPU --strip --parallel 8
FROM base AS cuda-11
ARG CUDA11VERSION=11.3
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
ENV PATH=/usr/local/cuda-11/bin:$PATH
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CUDA 11' \
&& cmake --build --parallel --preset 'CUDA 11' \
&& cmake --install build --component CUDA --strip --parallel 8
FROM base AS cuda-12 FROM base AS cuda-12
ARG CUDA12VERSION=12.8 ARG CUDA12VERSION=12.8
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-} RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
...@@ -98,11 +85,9 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ ...@@ -98,11 +85,9 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
go build -trimpath -buildmode=pie -o /bin/ollama . go build -trimpath -buildmode=pie -o /bin/ollama .
FROM --platform=linux/amd64 scratch AS amd64 FROM --platform=linux/amd64 scratch AS amd64
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
FROM --platform=linux/arm64 scratch AS arm64 FROM --platform=linux/arm64 scratch AS arm64
COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5 COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6 COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6
......
UPSTREAM=https://github.com/ggerganov/llama.cpp.git UPSTREAM=https://github.com/ggerganov/llama.cpp.git
WORKDIR=llama/vendor WORKDIR=llama/vendor
FETCH_HEAD=2016f07bd106c73699ecbaace80f55db5ed95dac FETCH_HEAD=e1e8e0991ffd9e99a445c6812bb519d5bac9f4b5
.PHONY: help .PHONY: help
help: help:
......
...@@ -61,6 +61,8 @@ Here are some example models that can be downloaded: ...@@ -61,6 +61,8 @@ Here are some example models that can be downloaded:
| QwQ | 32B | 20GB | `ollama run qwq` | | QwQ | 32B | 20GB | `ollama run qwq` |
| DeepSeek-R1 | 7B | 4.7GB | `ollama run deepseek-r1` | | DeepSeek-R1 | 7B | 4.7GB | `ollama run deepseek-r1` |
| DeepSeek-R1 | 671B | 404GB | `ollama run deepseek-r1:671b` | | DeepSeek-R1 | 671B | 404GB | `ollama run deepseek-r1:671b` |
| Llama 4 | 109B | 67GB | `ollama run llama4:scout` |
| Llama 4 | 400B | 245GB | `ollama run llama4:maverick` |
| Llama 3.3 | 70B | 43GB | `ollama run llama3.3` | | Llama 3.3 | 70B | 43GB | `ollama run llama3.3` |
| Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` | | Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` |
| Llama 3.2 | 1B | 1.3GB | `ollama run llama3.2:1b` | | Llama 3.2 | 1B | 1.3GB | `ollama run llama3.2:1b` |
...@@ -77,7 +79,7 @@ Here are some example models that can be downloaded: ...@@ -77,7 +79,7 @@ Here are some example models that can be downloaded:
| Code Llama | 7B | 3.8GB | `ollama run codellama` | | Code Llama | 7B | 3.8GB | `ollama run codellama` |
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` | | Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
| LLaVA | 7B | 4.5GB | `ollama run llava` | | LLaVA | 7B | 4.5GB | `ollama run llava` |
| Granite-3.2 | 8B | 4.9GB | `ollama run granite3.2` | | Granite-3.3 | 8B | 4.9GB | `ollama run granite3.3` |
> [!NOTE] > [!NOTE]
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models. > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
...@@ -285,7 +287,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -285,7 +287,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt) - [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
- [HTML UI](https://github.com/rtcfirefly/ollama-ui) - [HTML UI](https://github.com/rtcfirefly/ollama-ui)
- [Saddle](https://github.com/jikkuatwork/saddle) - [Saddle](https://github.com/jikkuatwork/saddle)
- [TagSpaces](https://www.tagspaces.org) (A platform for file based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions) - [TagSpaces](https://www.tagspaces.org) (A platform for file-based apps, [utilizing Ollama](https://docs.tagspaces.org/ai/) for the generation of tags and descriptions)
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama) - [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
- [Chatbot UI v2](https://github.com/mckaywrigley/chatbot-ui) - [Chatbot UI v2](https://github.com/mckaywrigley/chatbot-ui)
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file) - [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
...@@ -312,6 +314,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -312,6 +314,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat) - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats) - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
- [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS) - [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS)
- [Jirapt](https://github.com/AliAhmedNada/jirapt) (Jira Integration to generate issues, tasks, epics)
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories) - [QA-Pilot](https://github.com/reid41/QA-Pilot) (Interactive chat tool that can leverage Ollama models for rapid understanding and navigation of GitHub code repositories)
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases) - [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG) - [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
...@@ -325,14 +328,14 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -325,14 +328,14 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama) - [RWKV-Runner](https://github.com/josStorer/RWKV-Runner) (RWKV offline LLM deployment tool, also usable as a client for ChatGPT and Ollama)
- [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models) - [Ollama Grid Search](https://github.com/dezoito/ollama-grid-search) (app to evaluate and compare models)
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
- [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support and multiple large language models.) - [Casibase](https://casibase.org) (An open source AI knowledge base and dialogue system combining the latest RAG, SSO, ollama support, and multiple large language models.)
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS) - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama) - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
- [Shinkai Desktop](https://github.com/dcSpark/shinkai-apps) (Two click install Local AI using Ollama + Files + RAG) - [Shinkai Desktop](https://github.com/dcSpark/shinkai-apps) (Two click install Local AI using Ollama + Files + RAG)
- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in discord ) - [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in Discord)
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama) - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
- [R2R](https://github.com/SciPhi-AI/R2R) (Open-source RAG engine) - [R2R](https://github.com/SciPhi-AI/R2R) (Open-source RAG engine)
- [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy to use GUI with sample custom LLM for Drivers Education) - [Ollama-Kis](https://github.com/elearningshow/ollama-kis) (A simple easy-to-use GUI with sample custom LLM for Drivers Education)
- [OpenGPA](https://opengpa.org) (Open-source offline-first Enterprise Agentic Application) - [OpenGPA](https://opengpa.org) (Open-source offline-first Enterprise Agentic Application)
- [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations) - [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations)
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS) - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
...@@ -341,16 +344,16 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -341,16 +344,16 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows) - [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac) - [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend) - [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
- [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows and Mac) - [PyGPT](https://github.com/szczyglis-dev/py-gpt) (AI desktop assistant for Linux, Windows, and Mac)
- [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for linux and macos made with GTK4 and Adwaita) - [Alpaca](https://github.com/Jeffser/Alpaca) (An Ollama client application for Linux and macOS made with GTK4 and Adwaita)
- [AutoGPT](https://github.com/Significant-Gravitas/AutoGPT/blob/master/docs/content/platform/ollama.md) (AutoGPT Ollama integration) - [AutoGPT](https://github.com/Significant-Gravitas/AutoGPT/blob/master/docs/content/platform/ollama.md) (AutoGPT Ollama integration)
- [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang) - [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang)
- [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery) - [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery)
- [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot and Ollama4j - [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot, and Ollama4j
- [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models. - [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models.
- [Cline](https://github.com/cline/cline) - Formerly known as Claude Dev is a VSCode extension for multi-file/whole-repo coding - [Cline](https://github.com/cline/cline) - Formerly known as Claude Dev is a VSCode extension for multi-file/whole-repo coding
- [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support) - [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support)
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption) - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption)
- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library) - [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library)
- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama) - [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama)
- [Tkinter-based client](https://github.com/chyok/ollama-gui) (Python tkinter-based Client for Ollama) - [Tkinter-based client](https://github.com/chyok/ollama-gui) (Python tkinter-based Client for Ollama)
...@@ -368,7 +371,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -368,7 +371,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface) - [DualMind](https://github.com/tcsenpai/dualmind) (Experimental app allowing two models to talk to each other in the terminal or in a web interface)
- [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol) - [ollamarama-matrix](https://github.com/h1ddenpr0cess20/ollamarama-matrix) (Ollama chatbot for the Matrix chat protocol)
- [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app) - [ollama-chat-app](https://github.com/anan1213095357/ollama-chat-app) (Flutter-based chat app)
- [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard and said in the meetings) - [Perfect Memory AI](https://www.perfectmemory.ai/) (Productivity AI assists personalized by what you have seen on your screen, heard, and said in the meetings)
- [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder) - [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder)
- [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation) - [Reddit Rate](https://github.com/rapidarchitect/reddit_analyzer) (Search and Rate Reddit topics with a weighted summation)
- [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI) - [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI)
...@@ -386,7 +389,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -386,7 +389,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints) - [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI) - [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models) - [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally) - [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivalent endpoint with Ollama support for running locally)
- [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot) - [AntSK](https://github.com/AIDotNet/AntSK) (Out-of-the-box & Adaptable RAG Chatbot)
- [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot) - [MaxKB](https://github.com/1Panel-dev/MaxKB/) (Ready-to-use & flexible RAG Chatbot)
- [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models) - [yla](https://github.com/danielekp/yla) (Web interface to freely interact with your customized models)
...@@ -394,11 +397,13 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -394,11 +397,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [1Panel](https://github.com/1Panel-dev/1Panel/) (Web-based Linux Server Management Tool) - [1Panel](https://github.com/1Panel-dev/1Panel/) (Web-based Linux Server Management Tool)
- [AstrBot](https://github.com/Soulter/AstrBot/) (User-friendly LLM-based multi-platform chatbot with a WebUI, supporting RAG, LLM agents, and plugins integration) - [AstrBot](https://github.com/Soulter/AstrBot/) (User-friendly LLM-based multi-platform chatbot with a WebUI, supporting RAG, LLM agents, and plugins integration)
- [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.) - [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.)
- [Flufy](https://github.com/Aharon-Bensadoun/Flufy) (A beautiful chat interface for interacting with Ollama's API. Built with React, TypeScript, and Material-UI.)
- [Ellama](https://github.com/zeozeozeo/ellama) (Friendly native app to chat with an Ollama instance) - [Ellama](https://github.com/zeozeozeo/ellama) (Friendly native app to chat with an Ollama instance)
- [screenpipe](https://github.com/mediar-ai/screenpipe) Build agents powered by your screen history - [screenpipe](https://github.com/mediar-ai/screenpipe) Build agents powered by your screen history
- [Ollamb](https://github.com/hengkysteen/ollamb) (Simple yet rich in features, cross-platform built with Flutter and designed for Ollama. Try the [web demo](https://hengkysteen.github.io/demo/ollamb/).) - [Ollamb](https://github.com/hengkysteen/ollamb) (Simple yet rich in features, cross-platform built with Flutter and designed for Ollama. Try the [web demo](https://hengkysteen.github.io/demo/ollamb/).)
- [Writeopia](https://github.com/Writeopia/Writeopia) (Text editor with integration with Ollama) - [Writeopia](https://github.com/Writeopia/Writeopia) (Text editor with integration with Ollama)
- [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable) - [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable)
- [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers)
### Cloud ### Cloud
...@@ -440,7 +445,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -440,7 +445,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama - [PowershAI](https://github.com/rrg92/powershai) PowerShell module that brings AI to terminal on Windows, including support for Ollama
- [DeepShell](https://github.com/Abyss-c0re/deepshell) Your self-hosted AI assistant. Interactive Shell, Files and Folders analysis. - [DeepShell](https://github.com/Abyss-c0re/deepshell) Your self-hosted AI assistant. Interactive Shell, Files and Folders analysis.
- [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama. - [orbiton](https://github.com/xyproto/orbiton) Configuration-free text editor and IDE with support for tab completion with Ollama.
- [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull and download models from Ollama Registry in your terminal. - [orca-cli](https://github.com/molbal/orca-cli) Ollama Registry CLI Application - Browse, pull, and download models from Ollama Registry in your terminal.
- [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform) - [GGUF-to-Ollama](https://github.com/jonathanhecl/gguf-to-ollama) - Importing GGUF to Ollama made easy (multiplatform)
### Apple Vision Pro ### Apple Vision Pro
...@@ -468,7 +473,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -468,7 +473,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Libraries ### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/) - [LangChain](https://python.langchain.com/docs/integrations/chat/ollama/) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/)
- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama) - [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
- [crewAI](https://github.com/crewAIInc/crewAI) - [crewAI](https://github.com/crewAIInc/crewAI)
- [Yacana](https://remembersoftwares.github.io/yacana/) (User-friendly multi-agent framework for brainstorming and executing predetermined flows with built-in tool integration) - [Yacana](https://remembersoftwares.github.io/yacana/) (User-friendly multi-agent framework for brainstorming and executing predetermined flows with built-in tool integration)
...@@ -515,7 +520,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -515,7 +520,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Swollama for Swift](https://github.com/marcusziade/Swollama) with [DocC](https://marcusziade.github.io/Swollama/documentation/swollama/) - [Swollama for Swift](https://github.com/marcusziade/Swollama) with [DocC](https://marcusziade.github.io/Swollama/documentation/swollama/)
- [GoLamify](https://github.com/prasad89/golamify) - [GoLamify](https://github.com/prasad89/golamify)
- [Ollama for Haskell](https://github.com/tusharad/ollama-haskell) - [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
- [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in unified API) - [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in a unified API)
- [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs) - [LlmTornado](https://github.com/lofcz/llmtornado) (C# library providing a unified interface for major FOSS & Commercial inference APIs)
- [Ollama for Zig](https://github.com/dravenk/ollama-zig) - [Ollama for Zig](https://github.com/dravenk/ollama-zig)
- [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider) - [Abso](https://github.com/lunary-ai/abso) (OpenAI-compatible TypeScript SDK for any LLM provider)
...@@ -524,11 +529,11 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -524,11 +529,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Mobile ### Mobile
- [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS and iPad) - [SwiftChat](https://github.com/aws-samples/swift-chat) (Lightning-fast Cross-platform AI chat app with native UI for Android, iOS, and iPad)
- [Enchanted](https://github.com/AugustDev/enchanted) - [Enchanted](https://github.com/AugustDev/enchanted)
- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid) - [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama) - [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption) - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy-focused LLM chat interface with optional encryption)
- [Ollama Android Chat](https://github.com/sunshine0523/OllamaServer) (No need for Termux, start the Ollama service with one click on an Android device) - [Ollama Android Chat](https://github.com/sunshine0523/OllamaServer) (No need for Termux, start the Ollama service with one click on an Android device)
- [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.) - [Reins](https://github.com/ibrahimcetin/reins) (Easily tweak parameters, customize system prompts per chat, and enhance your AI experiments with reasoning model support.)
...@@ -552,7 +557,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -552,7 +557,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt) - [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama) - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama) - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot) - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use Ollama as a copilot like GitHub Copilot)
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama) - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face) - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
...@@ -562,8 +567,8 @@ See the [API documentation](./docs/api.md) for all endpoints. ...@@ -562,8 +567,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
- [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467) - [ChatGPTBox: All in one browser extension](https://github.com/josStorer/chatGPTBox) with [Integrating Tutorial](https://github.com/josStorer/chatGPTBox/issues/616#issuecomment-1975186467)
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities. - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server) - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depend on ollama server)
- [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front end Open WebUI service.) - [Terraform AWS Ollama & Open WebUI](https://github.com/xuyangbocn/terraform-aws-self-host-llm) (A Terraform module to deploy on AWS a ready-to-use Ollama service, together with its front-end Open WebUI service.)
- [node-red-contrib-ollama](https://github.com/jakubburkiewicz/node-red-contrib-ollama) - [node-red-contrib-ollama](https://github.com/jakubburkiewicz/node-red-contrib-ollama)
- [Local AI Helper](https://github.com/ivostoykov/localAI) (Chrome and Firefox extensions that enable interactions with the active tab and customisable API endpoints. Includes secure storage for user prompts.) - [Local AI Helper](https://github.com/ivostoykov/localAI) (Chrome and Firefox extensions that enable interactions with the active tab and customisable API endpoints. Includes secure storage for user prompts.)
- [vnc-lm](https://github.com/jake83741/vnc-lm) (Discord bot for messaging with LLMs through Ollama and LiteLLM. Seamlessly move between local and flagship models.) - [vnc-lm](https://github.com/jake83741/vnc-lm) (Discord bot for messaging with LLMs through Ollama and LiteLLM. Seamlessly move between local and flagship models.)
......
package api package api
import ( import (
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http" "net/http"
...@@ -137,7 +136,7 @@ func TestClientStream(t *testing.T) { ...@@ -137,7 +136,7 @@ func TestClientStream(t *testing.T) {
client := NewClient(&url.URL{Scheme: "http", Host: ts.Listener.Addr().String()}, http.DefaultClient) client := NewClient(&url.URL{Scheme: "http", Host: ts.Listener.Addr().String()}, http.DefaultClient)
var receivedChunks []ChatResponse var receivedChunks []ChatResponse
err := client.stream(context.Background(), http.MethodPost, "/v1/chat", nil, func(chunk []byte) error { err := client.stream(t.Context(), http.MethodPost, "/v1/chat", nil, func(chunk []byte) error {
var resp ChatResponse var resp ChatResponse
if err := json.Unmarshal(chunk, &resp); err != nil { if err := json.Unmarshal(chunk, &resp); err != nil {
return fmt.Errorf("failed to unmarshal chunk: %w", err) return fmt.Errorf("failed to unmarshal chunk: %w", err)
...@@ -223,7 +222,7 @@ func TestClientDo(t *testing.T) { ...@@ -223,7 +222,7 @@ func TestClientDo(t *testing.T) {
ID string `json:"id"` ID string `json:"id"`
Success bool `json:"success"` Success bool `json:"success"`
} }
err := client.do(context.Background(), http.MethodPost, "/v1/messages", nil, &resp) err := client.do(t.Context(), http.MethodPost, "/v1/messages", nil, &resp)
if tc.wantErr != "" { if tc.wantErr != "" {
if err == nil { if err == nil {
......
...@@ -271,9 +271,6 @@ type Options struct { ...@@ -271,9 +271,6 @@ type Options struct {
RepeatPenalty float32 `json:"repeat_penalty,omitempty"` RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"` PresencePenalty float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"` FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatTau float32 `json:"mirostat_tau,omitempty"`
MirostatEta float32 `json:"mirostat_eta,omitempty"`
Stop []string `json:"stop,omitempty"` Stop []string `json:"stop,omitempty"`
} }
...@@ -283,12 +280,7 @@ type Runner struct { ...@@ -283,12 +280,7 @@ type Runner struct {
NumBatch int `json:"num_batch,omitempty"` NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"` NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"` MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
F16KV bool `json:"f16_kv,omitempty"` // Deprecated: This option is ignored
LogitsAll bool `json:"logits_all,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"`
UseMMap *bool `json:"use_mmap,omitempty"` UseMMap *bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
NumThread int `json:"num_thread,omitempty"` NumThread int `json:"num_thread,omitempty"`
} }
...@@ -471,13 +463,6 @@ type ProcessModelResponse struct { ...@@ -471,13 +463,6 @@ type ProcessModelResponse struct {
SizeVRAM int64 `json:"size_vram"` SizeVRAM int64 `json:"size_vram"`
} }
type RetrieveModelResponse struct {
Id string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
}
type TokenResponse struct { type TokenResponse struct {
Token string `json:"token"` Token string `json:"token"`
} }
...@@ -660,9 +645,6 @@ func DefaultOptions() Options { ...@@ -660,9 +645,6 @@ func DefaultOptions() Options {
RepeatPenalty: 1.1, RepeatPenalty: 1.1,
PresencePenalty: 0.0, PresencePenalty: 0.0,
FrequencyPenalty: 0.0, FrequencyPenalty: 0.0,
Mirostat: 0,
MirostatTau: 5.0,
MirostatEta: 0.1,
Seed: -1, Seed: -1,
Runner: Runner{ Runner: Runner{
...@@ -671,8 +653,6 @@ func DefaultOptions() Options { ...@@ -671,8 +653,6 @@ func DefaultOptions() Options {
NumBatch: 512, NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumThread: 0, // let the runtime decide NumThread: 0, // let the runtime decide
LowVRAM: false,
UseMLock: false,
UseMMap: nil, UseMMap: nil,
}, },
} }
......
...@@ -78,7 +78,7 @@ func BenchmarkColdStart(b *testing.B) { ...@@ -78,7 +78,7 @@ func BenchmarkColdStart(b *testing.B) {
for _, tt := range tests { for _, tt := range tests {
b.Run(fmt.Sprintf("%s/cold/%s", m, tt.name), func(b *testing.B) { b.Run(fmt.Sprintf("%s/cold/%s", m, tt.name), func(b *testing.B) {
ctx := context.Background() ctx := b.Context()
// Set number of tokens as our throughput metric // Set number of tokens as our throughput metric
b.SetBytes(int64(tt.maxTokens)) b.SetBytes(int64(tt.maxTokens))
...@@ -113,7 +113,7 @@ func BenchmarkWarmStart(b *testing.B) { ...@@ -113,7 +113,7 @@ func BenchmarkWarmStart(b *testing.B) {
for _, tt := range tests { for _, tt := range tests {
b.Run(fmt.Sprintf("%s/warm/%s", m, tt.name), func(b *testing.B) { b.Run(fmt.Sprintf("%s/warm/%s", m, tt.name), func(b *testing.B) {
ctx := context.Background() ctx := b.Context()
// Pre-warm the model // Pre-warm the model
warmup(client, m, tt.prompt, b) warmup(client, m, tt.prompt, b)
...@@ -140,7 +140,7 @@ func setup(b *testing.B) *api.Client { ...@@ -140,7 +140,7 @@ func setup(b *testing.B) *api.Client {
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
if _, err := client.Show(context.Background(), &api.ShowRequest{Model: modelName(b)}); err != nil { if _, err := client.Show(b.Context(), &api.ShowRequest{Model: modelName(b)}); err != nil {
b.Fatalf("Model unavailable: %v", err) b.Fatalf("Model unavailable: %v", err)
} }
......
...@@ -31,6 +31,7 @@ import ( ...@@ -31,6 +31,7 @@ import (
"github.com/olekukonko/tablewriter" "github.com/olekukonko/tablewriter"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
"golang.org/x/sync/errgroup"
"golang.org/x/term" "golang.org/x/term"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
...@@ -41,6 +42,7 @@ import ( ...@@ -41,6 +42,7 @@ import (
"github.com/ollama/ollama/runner" "github.com/ollama/ollama/runner"
"github.com/ollama/ollama/server" "github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/model" "github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/types/syncmap"
"github.com/ollama/ollama/version" "github.com/ollama/ollama/version"
) )
...@@ -106,7 +108,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { ...@@ -106,7 +108,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
} }
spinner.Stop() spinner.Stop()
req.Name = args[0] req.Model = args[0]
quantize, _ := cmd.Flags().GetString("quantize") quantize, _ := cmd.Flags().GetString("quantize")
if quantize != "" { if quantize != "" {
req.Quantize = quantize req.Quantize = quantize
...@@ -117,34 +119,54 @@ func CreateHandler(cmd *cobra.Command, args []string) error { ...@@ -117,34 +119,54 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err return err
} }
if len(req.Files) > 0 { var g errgroup.Group
fileMap := map[string]string{} g.SetLimit(max(runtime.GOMAXPROCS(0)-1, 1))
for f, digest := range req.Files {
files := syncmap.NewSyncMap[string, string]()
for f, digest := range req.Files {
g.Go(func() error {
if _, err := createBlob(cmd, client, f, digest, p); err != nil { if _, err := createBlob(cmd, client, f, digest, p); err != nil {
return err return err
} }
fileMap[filepath.Base(f)] = digest
} // TODO: this is incorrect since the file might be in a subdirectory
req.Files = fileMap // instead this should take the path relative to the model directory
// but the current implementation does not allow this
files.Store(filepath.Base(f), digest)
return nil
})
} }
if len(req.Adapters) > 0 { adapters := syncmap.NewSyncMap[string, string]()
fileMap := map[string]string{} for f, digest := range req.Adapters {
for f, digest := range req.Adapters { g.Go(func() error {
if _, err := createBlob(cmd, client, f, digest, p); err != nil { if _, err := createBlob(cmd, client, f, digest, p); err != nil {
return err return err
} }
fileMap[filepath.Base(f)] = digest
} // TODO: same here
req.Adapters = fileMap adapters.Store(filepath.Base(f), digest)
return nil
})
} }
if err := g.Wait(); err != nil {
return err
}
req.Files = files.Items()
req.Adapters = adapters.Items()
bars := make(map[string]*progress.Bar) bars := make(map[string]*progress.Bar)
fn := func(resp api.ProgressResponse) error { fn := func(resp api.ProgressResponse) error {
if resp.Digest != "" { if resp.Digest != "" {
bar, ok := bars[resp.Digest] bar, ok := bars[resp.Digest]
if !ok { if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed) msg := resp.Status
if msg == "" {
msg = fmt.Sprintf("pulling %s...", resp.Digest[7:19])
}
bar = progress.NewBar(msg, resp.Total, resp.Completed)
bars[resp.Digest] = bar bars[resp.Digest] = bar
p.Add(resp.Digest, bar) p.Add(resp.Digest, bar)
} }
...@@ -213,7 +235,7 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, digest stri ...@@ -213,7 +235,7 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, digest stri
} }
}() }()
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil { if err := client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
return "", err return "", err
} }
return digest, nil return digest, nil
...@@ -1407,7 +1429,6 @@ func NewCLI() *cobra.Command { ...@@ -1407,7 +1429,6 @@ func NewCLI() *cobra.Command {
envVars["OLLAMA_LLM_LIBRARY"], envVars["OLLAMA_LLM_LIBRARY"],
envVars["OLLAMA_GPU_OVERHEAD"], envVars["OLLAMA_GPU_OVERHEAD"],
envVars["OLLAMA_LOAD_TIMEOUT"], envVars["OLLAMA_LOAD_TIMEOUT"],
envVars["OLLAMA_CONTEXT_LENGTH"],
}) })
default: default:
appendEnvDocs(cmd, envs) appendEnvDocs(cmd, envs)
......
...@@ -2,7 +2,6 @@ package cmd ...@@ -2,7 +2,6 @@ package cmd
import ( import (
"bytes" "bytes"
"context"
"encoding/json" "encoding/json"
"io" "io"
"net/http" "net/http"
...@@ -337,7 +336,7 @@ func TestDeleteHandler(t *testing.T) { ...@@ -337,7 +336,7 @@ func TestDeleteHandler(t *testing.T) {
t.Cleanup(mockServer.Close) t.Cleanup(mockServer.Close)
cmd := &cobra.Command{} cmd := &cobra.Command{}
cmd.SetContext(context.TODO()) cmd.SetContext(t.Context())
if err := DeleteHandler(cmd, []string{"test-model"}); err != nil { if err := DeleteHandler(cmd, []string{"test-model"}); err != nil {
t.Fatalf("DeleteHandler failed: %v", err) t.Fatalf("DeleteHandler failed: %v", err)
} }
...@@ -399,11 +398,6 @@ func TestGetModelfileName(t *testing.T) { ...@@ -399,11 +398,6 @@ func TestGetModelfileName(t *testing.T) {
var expectedFilename string var expectedFilename string
if tt.fileExists { if tt.fileExists {
tempDir, err := os.MkdirTemp("", "modelfiledir")
defer os.RemoveAll(tempDir)
if err != nil {
t.Fatalf("temp modelfile dir creation failed: %v", err)
}
var fn string var fn string
if tt.modelfileName != "" { if tt.modelfileName != "" {
fn = tt.modelfileName fn = tt.modelfileName
...@@ -411,7 +405,7 @@ func TestGetModelfileName(t *testing.T) { ...@@ -411,7 +405,7 @@ func TestGetModelfileName(t *testing.T) {
fn = "Modelfile" fn = "Modelfile"
} }
tempFile, err := os.CreateTemp(tempDir, fn) tempFile, err := os.CreateTemp(t.TempDir(), fn)
if err != nil { if err != nil {
t.Fatalf("temp modelfile creation failed: %v", err) t.Fatalf("temp modelfile creation failed: %v", err)
} }
...@@ -530,7 +524,7 @@ func TestPushHandler(t *testing.T) { ...@@ -530,7 +524,7 @@ func TestPushHandler(t *testing.T) {
cmd := &cobra.Command{} cmd := &cobra.Command{}
cmd.Flags().Bool("insecure", false, "") cmd.Flags().Bool("insecure", false, "")
cmd.SetContext(context.TODO()) cmd.SetContext(t.Context())
// Redirect stderr to capture progress output // Redirect stderr to capture progress output
oldStderr := os.Stderr oldStderr := os.Stderr
...@@ -635,7 +629,7 @@ func TestListHandler(t *testing.T) { ...@@ -635,7 +629,7 @@ func TestListHandler(t *testing.T) {
t.Setenv("OLLAMA_HOST", mockServer.URL) t.Setenv("OLLAMA_HOST", mockServer.URL)
cmd := &cobra.Command{} cmd := &cobra.Command{}
cmd.SetContext(context.TODO()) cmd.SetContext(t.Context())
// Capture stdout // Capture stdout
oldStdout := os.Stdout oldStdout := os.Stdout
...@@ -690,7 +684,7 @@ func TestCreateHandler(t *testing.T) { ...@@ -690,7 +684,7 @@ func TestCreateHandler(t *testing.T) {
return return
} }
if req.Name != "test-model" { if req.Model != "test-model" {
t.Errorf("expected model name 'test-model', got %s", req.Name) t.Errorf("expected model name 'test-model', got %s", req.Name)
} }
...@@ -730,7 +724,7 @@ func TestCreateHandler(t *testing.T) { ...@@ -730,7 +724,7 @@ func TestCreateHandler(t *testing.T) {
})) }))
t.Setenv("OLLAMA_HOST", mockServer.URL) t.Setenv("OLLAMA_HOST", mockServer.URL)
t.Cleanup(mockServer.Close) t.Cleanup(mockServer.Close)
tempFile, err := os.CreateTemp("", "modelfile") tempFile, err := os.CreateTemp(t.TempDir(), "modelfile")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
...@@ -750,7 +744,7 @@ func TestCreateHandler(t *testing.T) { ...@@ -750,7 +744,7 @@ func TestCreateHandler(t *testing.T) {
} }
cmd.Flags().Bool("insecure", false, "") cmd.Flags().Bool("insecure", false, "")
cmd.SetContext(context.TODO()) cmd.SetContext(t.Context())
// Redirect stderr to capture progress output // Redirect stderr to capture progress output
oldStderr := os.Stderr oldStderr := os.Stderr
......
...@@ -4,9 +4,9 @@ import ( ...@@ -4,9 +4,9 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io"
"io/fs" "io/fs"
"log/slog" "log/slog"
"os"
"slices" "slices"
"strings" "strings"
...@@ -89,7 +89,7 @@ type ModelConverter interface { ...@@ -89,7 +89,7 @@ type ModelConverter interface {
// KV maps parameters to LLM key-values // KV maps parameters to LLM key-values
KV(*Tokenizer) ggml.KV KV(*Tokenizer) ggml.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []ggml.Tensor Tensors([]Tensor) []*ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names. // Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string Replacements() []string
...@@ -106,13 +106,13 @@ type AdapterConverter interface { ...@@ -106,13 +106,13 @@ type AdapterConverter interface {
// KV maps parameters to LLM key-values // KV maps parameters to LLM key-values
KV(ggml.KV) ggml.KV KV(ggml.KV) ggml.KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors([]Tensor) []ggml.Tensor Tensors([]Tensor) []*ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names. // Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string Replacements() []string
} }
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error { func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json") bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil { if err != nil {
return err return err
...@@ -147,14 +147,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error { ...@@ -147,14 +147,14 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
return err return err
} }
return writeFile(ws, conv.KV(baseKV), conv.Tensors(ts)) return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
} }
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path. // and files it finds in the input path.
// Supported input model formats include safetensors. // Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { func ConvertModel(fsys fs.FS, f *os.File) error {
bts, err := fs.ReadFile(fsys, "config.json") bts, err := fs.ReadFile(fsys, "config.json")
if err != nil { if err != nil {
return err return err
...@@ -239,13 +239,13 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { ...@@ -239,13 +239,13 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
return err return err
} }
return writeFile(ws, conv.KV(t), conv.Tensors(ts)) return writeFile(f, conv.KV(t), conv.Tensors(ts))
} }
func writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error { func writeFile(f *os.File, kv ggml.KV, ts []*ggml.Tensor) error {
for i := range ts { for i := range ts {
ts[i].Shape = slices.Clone(ts[i].Shape) ts[i].Shape = slices.Clone(ts[i].Shape)
slices.Reverse(ts[i].Shape) slices.Reverse(ts[i].Shape)
} }
return ggml.WriteGGUF(ws, kv, ts) return ggml.WriteGGUF(f, kv, ts)
} }
...@@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) ggml.KV { ...@@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) ggml.KV {
return kv return kv
} }
func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor { func (p *bertModel) Tensors(ts []Tensor) []*ggml.Tensor {
var out []ggml.Tensor var out []*ggml.Tensor
for _, t := range ts { for _, t := range ts {
if slices.Contains([]string{ if slices.Contains([]string{
"embeddings.position_ids", "embeddings.position_ids",
...@@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor { ...@@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
continue continue
} }
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
......
...@@ -43,10 +43,10 @@ func (p *commandrModel) KV(t *Tokenizer) ggml.KV { ...@@ -43,10 +43,10 @@ func (p *commandrModel) KV(t *Tokenizer) ggml.KV {
return kv return kv
} }
func (p *commandrModel) Tensors(ts []Tensor) []ggml.Tensor { func (p *commandrModel) Tensors(ts []Tensor) []*ggml.Tensor {
var out []ggml.Tensor var out []*ggml.Tensor
for _, t := range ts { for _, t := range ts {
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
......
...@@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) ggml.KV { ...@@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
return kv return kv
} }
func (p *gemmaModel) Tensors(ts []Tensor) []ggml.Tensor { func (p *gemmaModel) Tensors(ts []Tensor) []*ggml.Tensor {
var out []ggml.Tensor var out []*ggml.Tensor
for _, t := range ts { for _, t := range ts {
if !strings.HasPrefix(t.Name(), "v.") && strings.HasSuffix(t.Name(), "_norm.weight") { if !strings.HasPrefix(t.Name(), "v.") && strings.HasSuffix(t.Name(), "_norm.weight") {
t.SetRepacker(p.addOne) t.SetRepacker(p.addOne)
} }
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
......
...@@ -21,8 +21,8 @@ func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV { ...@@ -21,8 +21,8 @@ func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
return kv return kv
} }
func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor { func (p *gemma2Adapter) Tensors(ts []Tensor) []*ggml.Tensor {
var out []ggml.Tensor var out []*ggml.Tensor
for _, t := range ts { for _, t := range ts {
shape := t.Shape() shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) || if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
...@@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor { ...@@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
t.SetRepacker(p.repack) t.SetRepacker(p.repack)
} }
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
......
...@@ -126,11 +126,11 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV { ...@@ -126,11 +126,11 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
return kv return kv
} }
func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor { func (p *llamaModel) Tensors(ts []Tensor) []*ggml.Tensor {
var out []ggml.Tensor var out []*ggml.Tensor
if p.RopeScaling.factors != nil { if p.RopeScaling.factors != nil {
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: "rope_freqs.weight", Name: "rope_freqs.weight",
Kind: 0, Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.factors))}, Shape: []uint64{uint64(len(p.RopeScaling.factors))},
...@@ -145,7 +145,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor { ...@@ -145,7 +145,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
} }
} }
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
......
...@@ -88,13 +88,13 @@ func (p *llama4Model) Replacements() []string { ...@@ -88,13 +88,13 @@ func (p *llama4Model) Replacements() []string {
} }
// Tensors implements ModelConverter. // Tensors implements ModelConverter.
func (p *llama4Model) Tensors(ts []Tensor) []ggml.Tensor { func (p *llama4Model) Tensors(ts []Tensor) []*ggml.Tensor {
var out []ggml.Tensor var out []*ggml.Tensor
var textTensors []Tensor var textTensors []Tensor
for _, t := range ts { for _, t := range ts {
if strings.HasPrefix(t.Name(), "v.") || strings.HasPrefix(t.Name(), "mm.") { if strings.HasPrefix(t.Name(), "v.") || strings.HasPrefix(t.Name(), "mm.") {
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
...@@ -112,7 +112,7 @@ func (p *llama4Model) Tensors(ts []Tensor) []ggml.Tensor { ...@@ -112,7 +112,7 @@ func (p *llama4Model) Tensors(ts []Tensor) []ggml.Tensor {
// clone tensor since we need separate repackers // clone tensor since we need separate repackers
tt := t.Clone() tt := t.Clone()
tt.SetRepacker(p.repack(nil, nil, tensor.S(i*halfDim, (i+1)*halfDim))) tt.SetRepacker(p.repack(nil, nil, tensor.S(i*halfDim, (i+1)*halfDim)))
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: strings.ReplaceAll(tt.Name(), "ffn_gate_up_exps", name), Name: strings.ReplaceAll(tt.Name(), "ffn_gate_up_exps", name),
Kind: tt.Kind(), Kind: tt.Kind(),
Shape: newShape, Shape: newShape,
...@@ -125,7 +125,7 @@ func (p *llama4Model) Tensors(ts []Tensor) []ggml.Tensor { ...@@ -125,7 +125,7 @@ func (p *llama4Model) Tensors(ts []Tensor) []ggml.Tensor {
t.SetRepacker(p.repack()) t.SetRepacker(p.repack())
newShape := slices.Clone(t.Shape()) newShape := slices.Clone(t.Shape())
newShape[1], newShape[2] = newShape[2], newShape[1] newShape[1], newShape[2] = newShape[2], newShape[1]
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: newShape, Shape: newShape,
......
...@@ -29,8 +29,8 @@ func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV { ...@@ -29,8 +29,8 @@ func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
return kv return kv
} }
func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor { func (p *llamaAdapter) Tensors(ts []Tensor) []*ggml.Tensor {
var out []ggml.Tensor var out []*ggml.Tensor
for _, t := range ts { for _, t := range ts {
shape := t.Shape() shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) || if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
...@@ -41,7 +41,7 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor { ...@@ -41,7 +41,7 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor {
t.SetRepacker(p.repack) t.SetRepacker(p.repack)
} }
out = append(out, ggml.Tensor{ out = append(out, &ggml.Tensor{
Name: t.Name(), Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: shape, Shape: shape,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment