Unverified Commit b754f5a6 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Remove submodule and shift to Go server - 0.4.0 (#7157)

* Remove llama.cpp submodule and shift new build to top

* CI: install msys and clang gcc on win

Needed for deepseek to work properly on windows
parent a805e594
...@@ -3,9 +3,7 @@ ollama ...@@ -3,9 +3,7 @@ ollama
app app
macapp macapp
dist dist
llm/llama.cpp
.env .env
.cache .cache
test_data test_data
llm/build
llama/build llama/build
llm/ext_server/* linguist-vendored
llama/**/*.cpp linguist-vendored llama/**/*.cpp linguist-vendored
llama/**/*.hpp linguist-vendored llama/**/*.hpp linguist-vendored
llama/**/*.h linguist-vendored llama/**/*.h linguist-vendored
......
...@@ -48,8 +48,8 @@ jobs: ...@@ -48,8 +48,8 @@ jobs:
with: with:
name: dist-darwin name: dist-darwin
path: | path: |
dist/*arwin* dist/Ollama-darwin.zip
!dist/*-cov dist/ollama-darwin
# Windows builds take a long time to both install the dependencies and build, so parallelize # Windows builds take a long time to both install the dependencies and build, so parallelize
# CPU generation step # CPU generation step
...@@ -85,6 +85,24 @@ jobs: ...@@ -85,6 +85,24 @@ jobs:
write-host "Installing plugin" write-host "Installing plugin"
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
write-host "plugin installed" write-host "plugin installed"
- name: Install msys2
run: |
$msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
write-host "Downloading msys2"
Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: verify tools
run: |
get-command gcc
gcc --version
get-command make
make --version
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version-file: go.mod go-version-file: go.mod
...@@ -92,19 +110,19 @@ jobs: ...@@ -92,19 +110,19 @@ jobs:
- run: go get ./... - run: go get ./...
- run: | - run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$env:PATH" $env:PATH="$gopath;$env:PATH"
go generate -x ./... $cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
name: go generate make -j $cores
name: make
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: generate-windows-cpu name: generate-windows-cpu
path: | path: |
build/**/* build/**/*
build/**/*.a build/**/*.a
llm/build/**/*.a
dist/windows-amd64/** dist/windows-amd64/**
# ROCm generation step # ROCm generation step
...@@ -140,6 +158,24 @@ jobs: ...@@ -140,6 +158,24 @@ jobs:
write-host "Installing plugin" write-host "Installing plugin"
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
write-host "plugin installed" write-host "plugin installed"
- name: Install msys2
run: |
$msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
write-host "Downloading msys2"
Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: verify tools
run: |
get-command gcc
gcc --version
get-command make
make --version
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version-file: go.mod go-version-file: go.mod
...@@ -158,31 +194,21 @@ jobs: ...@@ -158,31 +194,21 @@ jobs:
- run: go get ./... - run: go get ./...
- run: | - run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$env:PATH" $env:PATH="$gopath;$env:PATH"
$env:OLLAMA_SKIP_CPU_GENERATE="1" $env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
go generate -x ./... $cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
name: go generate make -j $cores
- name: 'gather rocm dependencies' name: make
run: |
$HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
md "dist\deps\bin\rocblas\library"
cp "${HIP_PATH}\bin\hipblas.dll" "dist\deps\bin\"
cp "${HIP_PATH}\bin\rocblas.dll" "dist\deps\bin\"
cp "${HIP_PATH}\bin\rocblas\library\*" "dist\deps\bin\rocblas\library\"
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: generate-windows-rocm name: generate-windows-rocm
path: | path: |
build/**/* build/**/*
dist/windows-amd64/** dist/windows-amd64/**
- uses: actions/upload-artifact@v4
with:
name: windows-rocm-deps
path: dist/deps/*
# CUDA generation step # CUDA generation step
generate-windows-cuda: generate-windows-cuda:
...@@ -224,6 +250,24 @@ jobs: ...@@ -224,6 +250,24 @@ jobs:
write-host "Installing plugin" write-host "Installing plugin"
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
write-host "plugin installed" write-host "plugin installed"
- name: Install msys2
run: |
$msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
write-host "Downloading msys2"
Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: verify tools
run: |
get-command gcc
gcc --version
get-command make
make --version
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version-file: go.mod go-version-file: go.mod
...@@ -245,34 +289,23 @@ jobs: ...@@ -245,34 +289,23 @@ jobs:
- name: 'Verify CUDA' - name: 'Verify CUDA'
run: nvcc -V run: nvcc -V
- run: go get ./... - run: go get ./...
- name: go generate - name: make
run: | run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
$cudabin=(get-command nvcc).source | split-path $cudabin=(get-command nvcc).source | split-path
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$cudabin;$env:PATH" $env:PATH="$gopath;$cudabin;$env:PATH"
$env:OLLAMA_SKIP_CPU_GENERATE="1" $env:OLLAMA_SKIP_CPU_GENERATE="1"
go generate -x ./... $cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
- name: 'gather cuda dependencies' make -j $cores
run: |
$NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
md "dist\deps"
cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: generate-windows-cuda-${{ matrix.cuda.version }} name: generate-windows-cuda-${{ matrix.cuda.version }}
path: | path: |
build/**/* build/**/*
dist/windows-amd64/** dist/windows-amd64/**
- uses: actions/upload-artifact@v4
with:
name: windows-cuda-deps-${{ matrix.cuda.version }}
path: dist/deps/*
# windows arm64 generate, go build, and zip file (no installer) # windows arm64 generate, go build, and zip file (no installer)
# Output of this build is aggregated into the final x86 build # Output of this build is aggregated into the final x86 build
...@@ -292,6 +325,30 @@ jobs: ...@@ -292,6 +325,30 @@ jobs:
choco install -y --no-progress git gzip choco install -y --no-progress git gzip
echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\ProgramData\chocolatey\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append echo "C:\ProgramData\chocolatey\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
# pacman is buggy on win arm64, so we avoid using it, but rely on the binary artifacts
# we download the sfx (7zip bundle) which isn't fully set up, but the binaries we need to build work
- name: Install msys2 x64
run: |
$url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-base-x86_64-20240727.sfx.exe"
write-host "Downloading MSYS2"
Invoke-WebRequest -Uri "$url" -outfile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @(
'-y', '-oC:\'
) -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
# since pacman isn't reliable, we just download the tar file and extract directly
- name: Downloading and extracting msys2 make tar file
run: |
$url="https://mirror.msys2.org/msys/x86_64/make-4.4.1-2-x86_64.pkg.tar.zst"
write-host "Downloading make"
Invoke-WebRequest -Uri "$url" -outfile c:\msys64\make.tar.zst
cd c:\msys64; tar -xf make.tar.zst
rm c:\msys64\make.tar.zst
- name: Verify Make works properly
run: |
echo $env:PATH
make --version
- name: Install Visual Studio 2022 - name: Install Visual Studio 2022
run: | run: |
$components = @( $components = @(
...@@ -385,10 +442,9 @@ jobs: ...@@ -385,10 +442,9 @@ jobs:
- run: | - run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
$gccpath=(get-command gcc).source | split-path -parent $gccpath=(get-command gcc).source | split-path -parent
& "C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -Arch arm64 -vsinstallpath 'C:\Program Files\Microsoft Visual Studio\2022\Community' -skipautomaticlocation
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:PATH="$gopath;$gccpath;$env:PATH"
$env:PATH="$gopath;$gccpath;$env:PATH;C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin"
echo $env:PATH echo $env:PATH
$env:ARCH="arm64" $env:ARCH="arm64"
.\scripts\build_windows.ps1 buildOllama buildApp gatherDependencies distZip .\scripts\build_windows.ps1 buildOllama buildApp gatherDependencies distZip
...@@ -441,6 +497,24 @@ jobs: ...@@ -441,6 +497,24 @@ jobs:
write-host "Installing plugin" write-host "Installing plugin"
& "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
write-host "plugin installed" write-host "plugin installed"
- name: Install msys2
run: |
$msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
write-host "Downloading msys2"
Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: verify tools
run: |
get-command gcc
gcc --version
get-command make
make --version
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version-file: go.mod go-version-file: go.mod
...@@ -455,15 +529,6 @@ jobs: ...@@ -455,15 +529,6 @@ jobs:
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: generate-windows-cuda-12 name: generate-windows-cuda-12
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps-11
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps-12
- uses: actions/download-artifact@v4
with:
name: windows-rocm-deps
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: generate-windows-rocm name: generate-windows-rocm
...@@ -474,11 +539,12 @@ jobs: ...@@ -474,11 +539,12 @@ jobs:
- run: dir build - run: dir build
- run: | - run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$env:PATH" $env:PATH="$gopath;$env:PATH"
$env:OLLAMA_SKIP_GENERATE="1" $env:OLLAMA_SKIP_GENERATE="1"
$env:ARCH="amd64"
& .\scripts\build_windows.ps1 & .\scripts\build_windows.ps1
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
......
...@@ -21,9 +21,6 @@ jobs: ...@@ -21,9 +21,6 @@ jobs:
changes: changes:
runs-on: ubuntu-latest runs-on: ubuntu-latest
outputs: outputs:
GENERATE: ${{ steps.changes.outputs.GENERATE }}
GENERATE_CUDA: ${{ steps.changes.outputs.GENERATE_CUDA }}
GENERATE_ROCM: ${{ steps.changes.outputs.GENERATE_ROCM }}
RUNNERS: ${{ steps.changes.outputs.RUNNERS }} RUNNERS: ${{ steps.changes.outputs.RUNNERS }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -39,53 +36,12 @@ jobs: ...@@ -39,53 +36,12 @@ jobs:
} }
{ {
echo GENERATE=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
echo GENERATE_CUDA=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
echo GENERATE_ROCM=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
echo RUNNERS=$(changed 'llama/**') echo RUNNERS=$(changed 'llama/**')
} >>$GITHUB_OUTPUT } >>$GITHUB_OUTPUT
generate: runners-linux-cuda:
needs: [changes] needs: [changes]
if: ${{ needs.changes.outputs.GENERATE == 'True' }} if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64, arm64]
exclude:
- os: ubuntu-latest
arch: arm64
- os: windows-2019
arch: arm64
runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
$gopath=(get-command go).source | split-path -parent
$gccpath=(get-command gcc).source | split-path -parent
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
cd $env:GITHUB_WORKSPACE
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$gccpath;$env:PATH"
echo $env:PATH
go generate -x ./...
if: ${{ startsWith(matrix.os, 'windows-') }}
name: 'Windows Go Generate'
- run: go generate -x ./...
if: ${{ ! startsWith(matrix.os, 'windows-') }}
name: 'Unix Go Generate'
- run: go build .
generate-cuda:
needs: [changes]
if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
strategy: strategy:
matrix: matrix:
cuda-version: cuda-version:
...@@ -95,8 +51,6 @@ jobs: ...@@ -95,8 +51,6 @@ jobs:
steps: steps:
- run: | - run: |
apt-get update && apt-get install -y git build-essential curl apt-get update && apt-get install -y git build-essential curl
curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
| tar -zx -C /usr --strip-components 1
env: env:
DEBIAN_FRONTEND: noninteractive DEBIAN_FRONTEND: noninteractive
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -107,12 +61,11 @@ jobs: ...@@ -107,12 +61,11 @@ jobs:
- run: go get ./... - run: go get ./...
- run: | - run: |
git config --global --add safe.directory /__w/ollama/ollama git config --global --add safe.directory /__w/ollama/ollama
go generate -x ./... cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
env: make -j $cores cuda_v11
OLLAMA_SKIP_CPU_GENERATE: '1' runners-linux-rocm:
generate-rocm:
needs: [changes] needs: [changes]
if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }} if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
strategy: strategy:
matrix: matrix:
rocm-version: rocm-version:
...@@ -122,8 +75,6 @@ jobs: ...@@ -122,8 +75,6 @@ jobs:
steps: steps:
- run: | - run: |
apt-get update && apt-get install -y git build-essential curl rocm-libs apt-get update && apt-get install -y git build-essential curl rocm-libs
curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
| tar -zx -C /usr --strip-components 1
env: env:
DEBIAN_FRONTEND: noninteractive DEBIAN_FRONTEND: noninteractive
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -134,14 +85,13 @@ jobs: ...@@ -134,14 +85,13 @@ jobs:
- run: go get ./... - run: go get ./...
- run: | - run: |
git config --global --add safe.directory /__w/ollama/ollama git config --global --add safe.directory /__w/ollama/ollama
go generate -x ./... cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
env: make -j $cores rocm
OLLAMA_SKIP_CPU_GENERATE: '1'
# ROCm generation step # ROCm generation step
generate-windows-rocm: runners-windows-rocm:
needs: [changes] needs: [changes]
if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }} if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: windows runs-on: windows
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -160,24 +110,42 @@ jobs: ...@@ -160,24 +110,42 @@ jobs:
- name: 'Verify ROCm' - name: 'Verify ROCm'
run: | run: |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
- name: Install msys2
run: |
$msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
write-host "Downloading msys2"
Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: verify tools
run: |
get-command gcc
gcc --version
get-command make
make --version
- run: go get ./... - run: go get ./...
- run: | - run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$env:PATH" $env:PATH="$gopath;$env:PATH"
$env:OLLAMA_SKIP_CPU_GENERATE="1" $env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
go generate -x ./... $cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
name: go generate write-host $env:HIP_PATH
env: make -C llama print-HIP_PATH print-HIP_LIB_DIR
OLLAMA_SKIP_CPU_GENERATE: '1' make -j $cores rocm
name: make
# CUDA generation step # CUDA generation step
generate-windows-cuda: runners-windows-cuda:
needs: [changes] needs: [changes]
if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }} if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: windows runs-on: windows
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -201,21 +169,40 @@ jobs: ...@@ -201,21 +169,40 @@ jobs:
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
- name: 'Verify CUDA' - name: 'Verify CUDA'
run: nvcc -V run: nvcc -V
- name: Install msys2
run: |
$msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
write-host "Downloading msys2"
Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: verify tools
run: |
get-command gcc
gcc --version
get-command make
make --version
- run: go get ./... - run: go get ./...
- name: go generate - name: make
run: | run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
$cudabin=(get-command nvcc).source | split-path $cudabin=(get-command nvcc).source | split-path
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$cudabin;$env:PATH" $env:PATH="$gopath;$cudabin;$env:PATH"
$env:OLLAMA_SKIP_CPU_GENERATE="1" $env:OLLAMA_SKIP_CPU_GENERATE="1"
go generate -x ./... $cores = (Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores
make -j $cores cuda_v11
env: env:
OLLAMA_SKIP_CPU_GENERATE: '1' OLLAMA_SKIP_CPU_GENERATE: '1'
runners: runners-cpu:
needs: [changes] needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }} if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
strategy: strategy:
...@@ -239,20 +226,41 @@ jobs: ...@@ -239,20 +226,41 @@ jobs:
go-version-file: go.mod go-version-file: go.mod
cache: true cache: true
- run: go get ./... - run: go get ./...
- name: Install msys2
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
$msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
write-host "Downloading msys2"
Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
write-host "Installing msys2"
Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: verify tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
get-command gcc
gcc --version
get-command make
make --version
- name: 'Build Windows Go Runners' - name: 'Build Windows Go Runners'
if: ${{ startsWith(matrix.os, 'windows-') }} if: ${{ startsWith(matrix.os, 'windows-') }}
run: | run: |
$gopath=(get-command go).source | split-path -parent $gopath=(get-command go).source | split-path -parent
$gccpath=(get-command gcc).source | split-path -parent $gccpath=(get-command gcc).source | split-path -parent
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
cd $env:GITHUB_WORKSPACE Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0" $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$gccpath;$env:PATH" $env:PATH="$gopath;$gccpath;$env:PATH"
echo $env:PATH echo $env:PATH
make -C llama -j 4 make -j 4
- name: 'Build Unix Go Runners' - name: 'Build Unix Go Runners'
if: ${{ ! startsWith(matrix.os, 'windows-') }} if: ${{ ! startsWith(matrix.os, 'windows-') }}
run: make -C llama -j 4 run: make -j 4
- run: go build . - run: go build .
lint: lint:
...@@ -302,9 +310,6 @@ jobs: ...@@ -302,9 +310,6 @@ jobs:
env: env:
GOARCH: ${{ matrix.arch }} GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1' CGO_ENABLED: '1'
OLLAMA_CPU_TARGET: 'static'
OLLAMA_SKIP_CPU_GENERATE: '1'
OLLAMA_SKIP_METAL_GENERATE: '1'
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
...@@ -319,7 +324,6 @@ jobs: ...@@ -319,7 +324,6 @@ jobs:
arm64) echo ARCH=arm64 ;; arm64) echo ARCH=arm64 ;;
esac >>$GITHUB_ENV esac >>$GITHUB_ENV
shell: bash shell: bash
- run: go generate ./...
- run: go build - run: go build
- run: go test -v ./... - run: go test -v ./...
...@@ -333,4 +337,4 @@ jobs: ...@@ -333,4 +337,4 @@ jobs:
submodules: recursive submodules: recursive
- name: Verify patches carry all the changes - name: Verify patches carry all the changes
run: | run: |
cd llama && make apply-patches sync && git diff --compact-summary --exit-code . make apply-patches sync && git diff --compact-summary --exit-code llama
\ No newline at end of file \ No newline at end of file
[submodule "llama.cpp"]
path = llm/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
shallow = true
\ No newline at end of file
...@@ -6,168 +6,134 @@ ARG CUDA_VERSION_12=12.4.0 ...@@ -6,168 +6,134 @@ ARG CUDA_VERSION_12=12.4.0
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG ROCM_VERSION=6.1.2 ARG ROCM_VERSION=6.1.2
# Copy the minimal context we need to run the generate scripts ### To create a local image for building linux binaries on mac or windows with efficient incremental builds
FROM scratch AS llm-code #
COPY .git .git # docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
COPY .gitmodules .gitmodules # docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
COPY llm llm #
### Then incremental builds will be much faster in this container
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64 #
ARG CMAKE_VERSION # make -C llama -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
COPY ./scripts/rh_linux_deps.sh / #
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V11_ARCHITECTURES
ENV GOARCH=amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ARG CGO_CFLAGS RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
ARG CUDA_V12_ARCHITECTURES dnf clean all && \
ENV GOARCH=amd64 dnf install -y \
RUN --mount=type=cache,target=/root/.ccache \ zsh \
OLLAMA_SKIP_STATIC_GENERATE=1 \ cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
OLLAMA_SKIP_CPU_GENERATE=1 \ cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ # TODO intel oneapi goes here...
CUDA_VARIANT="_v12" \ ENV GOARCH amd64
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ ENV CGO_ENABLED 1
bash gen_linux.sh WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-runner-arm64
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
# Note: this does not contain jetson variants
#
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
#
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ dnf config-manager --set-enabled appstream && \
WORKDIR /go/src/github.com/ollama/ollama/llm/generate dnf clean all && \
ARG CGO_CFLAGS dnf install -y \
zsh \
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]
FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_SKIP_CUDA_11_GENERATE
ARG OLLAMA_SKIP_CUDA_12_GENERATE
ARG OLLAMA_SKIP_ROCM_GENERATE
ARG CUDA_V11_ARCHITECTURES ARG CUDA_V11_ARCHITECTURES
ENV GOARCH=arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-runner-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES ARG CUDA_V12_ARCHITECTURES
ENV GOARCH=arm64 ARG OLLAMA_FAST_BUILD
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \ if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
OLLAMA_SKIP_CPU_GENERATE=1 \ make -C llama -j $(expr $(nproc) / 2 ) ; \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ else \
CUDA_VARIANT="_v12" \ make -C llama -j 5 ; \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ fi
bash gen_linux.sh
FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 COPY . .
ARG CMAKE_VERSION ARG OLLAMA_SKIP_CUDA_GENERATE
COPY ./scripts/rh_linux_deps.sh / ARG OLLAMA_SKIP_CUDA_11_GENERATE
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh ARG OLLAMA_SKIP_CUDA_12_GENERATE
ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH ARG CUDA_V11_ARCHITECTURES
ENV LIBRARY_PATH=/opt/amdgpu/lib64 ARG CUDA_V12_ARCHITECTURES
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_FAST_BUILD
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG AMDGPU_TARGETS
ENV GOARCH=amd64
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh make -C llama -j 8
RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - )
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
ENV GOARCH=amd64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64 # Intermediate stages used for ./scripts/build_linux.sh
RUN --mount=type=cache,target=/root/.ccache \ FROM --platform=linux/amd64 centos:7 AS builder-amd64
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ENV CGO_ENABLED 1
ARG OLLAMA_CUSTOM_CPU_DEFS ENV GOARCH amd64
ARG CGO_CFLAGS
ENV GOARCH=arm64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
# Intermediate stages used for ./scripts/build_linux.sh
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
ENV CGO_ENABLED=1
WORKDIR /go/src/github.com/ollama/ollama WORKDIR /go/src/github.com/ollama/ollama
FROM --platform=linux/amd64 builder-amd64 AS build-amd64
COPY . . COPY . .
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
ARG GOFLAGS ARG GOFLAGS
ARG CGO_CFLAGS ARG CGO_CFLAGS
ARG OLLAMA_SKIP_ROCM_GENERATE
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-amd64/bin/ollama . go build -trimpath -o dist/linux-amd64/bin/ollama .
RUN cd dist/linux-$GOARCH && \ RUN cd dist/linux-$GOARCH && \
tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
RUN cd dist/linux-$GOARCH-rocm && \ RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz cd dist/linux-$GOARCH-rocm && \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
fi
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
ENV CGO_ENABLED=1 ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
ENV CGO_ENABLED 1
ENV GOARCH arm64
WORKDIR /go/src/github.com/ollama/ollama WORKDIR /go/src/github.com/ollama/ollama
FROM --platform=linux/arm64 builder-arm64 AS build-arm64
COPY . . COPY . .
COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/ COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/
ARG GOFLAGS ARG GOFLAGS
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
...@@ -179,11 +145,11 @@ FROM --platform=linux/amd64 scratch AS dist-amd64 ...@@ -179,11 +145,11 @@ FROM --platform=linux/amd64 scratch AS dist-amd64
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
FROM --platform=linux/arm64 scratch AS dist-arm64 FROM --platform=linux/arm64 scratch AS dist-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
FROM dist-$TARGETARCH as dist FROM dist-$TARGETARCH AS dist
# Optimized container images do not cary nested payloads # Optimized container images do not cary nested payloads
FROM --platform=linux/amd64 cpu-builder-amd64 AS container-build-amd64 FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
WORKDIR /go/src/github.com/ollama/ollama WORKDIR /go/src/github.com/ollama/ollama
COPY . . COPY . .
ARG GOFLAGS ARG GOFLAGS
...@@ -191,7 +157,7 @@ ARG CGO_CFLAGS ...@@ -191,7 +157,7 @@ ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-amd64/bin/ollama . go build -trimpath -o dist/linux-amd64/bin/ollama .
FROM --platform=linux/arm64 cpu-builder-arm64 AS container-build-arm64 FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
WORKDIR /go/src/github.com/ollama/ollama WORKDIR /go/src/github.com/ollama/ollama
COPY . . COPY . .
ARG GOFLAGS ARG GOFLAGS
...@@ -199,48 +165,52 @@ ARG CGO_CFLAGS ...@@ -199,48 +165,52 @@ ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \ RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-arm64/bin/ollama . go build -trimpath -o dist/linux-arm64/bin/ollama .
# For amd64 container images, filter out cuda/rocm to minimize size
FROM runners-amd64 AS runners-cuda-amd64
RUN rm -rf \
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
./dist/linux-amd64/lib/ollama/runners/rocm*
FROM runners-amd64 AS runners-rocm-amd64
RUN rm -rf \
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
./dist/linux-amd64/lib/ollama/libcu*.so* \
./dist/linux-amd64/lib/ollama/runners/cuda*
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y ca-certificates && \ apt-get install -y ca-certificates && \
apt-get clean && rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y ca-certificates && \ apt-get install -y ca-certificates && \
apt-get clean && rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image # ROCm libraries larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer # Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
# across releases # across releases
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y ca-certificates && \ apt-get install -y ca-certificates && \
apt-get clean && rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
EXPOSE 11434 EXPOSE 11434
ENV OLLAMA_HOST=0.0.0.0 ENV OLLAMA_HOST 0.0.0.0
ENTRYPOINT ["/bin/ollama"] ENTRYPOINT ["/bin/ollama"]
CMD ["serve"] CMD ["serve"]
FROM runtime-$TARGETARCH FROM runtime-$TARGETARCH
EXPOSE 11434 EXPOSE 11434
ENV OLLAMA_HOST=0.0.0.0 ENV OLLAMA_HOST 0.0.0.0
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
......
GOALS := $(or $(MAKECMDGOALS),all)
.PHONY: $(GOALS)
$(GOALS):
$(MAKE) -C llama $@
\ No newline at end of file
# Development # Development
> [!IMPORTANT]
> The `llm` package that loads and runs models is being updated to use a new [Go runner](#transition-to-go-runner): this should only impact a small set of PRs however it does change how the project is built.
Install required tools:
- cmake version 3.24 or higher
- go version 1.22 or higher
- gcc version 11.4.0 or higher
### MacOS
```bash
brew install go cmake gcc
```
Optionally enable debugging and more verbose logging:
```bash
# At build time
export CGO_CFLAGS="-g"
# At runtime
export OLLAMA_DEBUG=1
```
Get the required libraries and build the native LLM code:
```bash
go generate ./...
```
Then build ollama:
```bash
go build .
```
Now you can run `ollama`:
```bash
./ollama
```
### Linux
#### Linux CUDA (NVIDIA)
_Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
Install `cmake` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
development and runtime packages.
Typically the build scripts will auto-detect CUDA, however, if your Linux distro
or installation approach uses unusual paths, you can specify the location by
specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
Then generate dependencies:
```
go generate ./...
```
Then build the binary:
```
go build .
```
#### Linux ROCm (AMD)
_Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_
Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/) development packages first, as well as `cmake` and `golang`.
Typically the build scripts will auto-detect ROCm, however, if your Linux distro
or installation approach uses unusual paths, you can specify the location by
specifying an environment variable `ROCM_PATH` to the location of the ROCm
install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
```
go generate ./...
```
Then build the binary:
```
go build .
```
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
#### Advanced CPU Settings
By default, running `go generate ./...` will compile a few different variations
of the LLM library based on common CPU families and vector math capabilities,
including a lowest-common-denominator which should run on almost any 64 bit CPU
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
load. If you would like to build a CPU-based build customized for your
processor, you can set `OLLAMA_CUSTOM_CPU_DEFS` to the llama.cpp flags you would
like to use. For example, to compile an optimized binary for an Intel i9-9880H,
you might use:
```
OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./...
go build .
```
#### Containerized Linux Build
If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist`
### Windows
Note: The Windows build for Ollama is still under development.
First, install required tools:
- MSVC toolchain - C/C++ and cmake as minimal requirements
- Go version 1.22 or higher
- MinGW (pick one variant) with GCC.
- [MinGW-w64](https://www.mingw-w64.org/)
- [MSYS2](https://www.msys2.org/)
- The `ThreadJob` Powershell module: `Install-Module -Name ThreadJob -Scope CurrentUser`
Then, build the `ollama` binary:
```powershell
$env:CGO_ENABLED="1"
go generate ./...
go build .
```
#### Windows CUDA (NVIDIA)
In addition to the common Windows development tools described above, install CUDA after installing MSVC.
- [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
#### Windows ROCm (AMD Radeon)
In addition to the common Windows development tools described above, install AMDs HIP package after installing MSVC.
- [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
- [Strawberry Perl](https://strawberryperl.com/)
Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
#### Windows arm64
The default `Developer PowerShell for VS 2022` may default to x86 which is not what you want. To ensure you get an arm64 development environment, start a plain PowerShell terminal and run:
```powershell
import-module 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\\Tools\\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -Arch arm64 -vsinstallpath 'C:\\Program Files\\Microsoft Visual Studio\\2022\\Community' -skipautomaticlocation
```
You can confirm with `write-host $env:VSCMD_ARG_TGT_ARCH`
Follow the instructions at https://www.msys2.org/wiki/arm64/ to set up an arm64 msys2 environment. Ollama requires gcc and mingw32-make to compile, which is not currently available on Windows arm64, but a gcc compatibility adapter is available via `mingw-w64-clang-aarch64-gcc-compat`. At a minimum you will need to install the following:
```
pacman -S mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-gcc-compat mingw-w64-clang-aarch64-make make
```
You will need to ensure your PATH includes go, cmake, gcc and clang mingw32-make to build ollama from source. (typically `C:\msys64\clangarm64\bin\`)
## Transition to Go runner
The Ollama team is working on moving to a new Go based runner that loads and runs models in a subprocess to replace the previous code under `ext_server`. During this transition period, this new Go runner is "opt in" at build time, and requires using a different approach to build.
After the transition to use the Go server exclusively, both `make` and `go generate` will build the Go runner.
Install required tools: Install required tools:
- go version 1.22 or higher - go version 1.22 or higher
...@@ -201,7 +23,7 @@ export OLLAMA_DEBUG=1 ...@@ -201,7 +23,7 @@ export OLLAMA_DEBUG=1
Get the required libraries and build the native LLM code: (Adjust the job count based on your number of processors for a faster build) Get the required libraries and build the native LLM code: (Adjust the job count based on your number of processors for a faster build)
```bash ```bash
make -C llama -j 5 make -j 5
``` ```
Then build ollama: Then build ollama:
...@@ -238,7 +60,7 @@ a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. " ...@@ -238,7 +60,7 @@ a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "
Then generate dependencies: (Adjust the job count based on your number of processors for a faster build) Then generate dependencies: (Adjust the job count based on your number of processors for a faster build)
``` ```
make -C llama -j 5 make -j 5
``` ```
Then build the binary: Then build the binary:
...@@ -263,7 +85,7 @@ the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx ...@@ -263,7 +85,7 @@ the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx
Then generate dependencies: (Adjust the job count based on your number of processors for a faster build) Then generate dependencies: (Adjust the job count based on your number of processors for a faster build)
``` ```
make -C llama -j 5 make -j 5
``` ```
Then build the binary: Then build the binary:
...@@ -308,7 +130,7 @@ Then, build the `ollama` binary: ...@@ -308,7 +130,7 @@ Then, build the `ollama` binary:
```powershell ```powershell
$env:CGO_ENABLED="1" $env:CGO_ENABLED="1"
make -C llama -j 8 make -j 8
go build . go build .
``` ```
......
# Note: once we have fully transitioned to the Go server, this will replace the old Dockerfile at the top of the tree
ARG GOLANG_VERSION=1.22.8
ARG CMAKE_VERSION=3.22.1
ARG CUDA_VERSION_11=11.3.1
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG CUDA_VERSION_12=12.4.0
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG ROCM_VERSION=6.1.2
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
#
# docker build --platform linux/amd64 -t builder-amd64 -f llama/Dockerfile --target unified-builder-amd64 .
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
#
### Then incremental builds will be much faster in this container
#
# make -C llama -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
#
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
COPY ./scripts/rh_linux_deps.sh /
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
dnf clean all && \
dnf install -y \
zsh \
cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
# TODO intel oneapi goes here...
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
# Note: this does not contain jetson variants
#
# docker build --platform linux/arm64 -t builder-arm64 -f llama/Dockerfile --target unified-builder-arm64 .
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
#
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
ARG CUDA_VERSION_11
ARG CUDA_VERSION_12
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
dnf config-manager --set-enabled appstream && \
dnf clean all && \
dnf install -y \
zsh \
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
ENV GOARCH amd64
ENV CGO_ENABLED 1
WORKDIR /go/src/github.com/ollama/ollama/
ENTRYPOINT [ "zsh" ]
FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_SKIP_CUDA_11_GENERATE
ARG OLLAMA_SKIP_CUDA_12_GENERATE
ARG OLLAMA_SKIP_ROCM_GENERATE
ARG CUDA_V11_ARCHITECTURES
ARG CUDA_V12_ARCHITECTURES
ARG OLLAMA_FAST_BUILD
RUN --mount=type=cache,target=/root/.ccache \
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
make -C llama -j $(expr $(nproc) / 2 ) ; \
else \
make -C llama -j 5 ; \
fi
FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
COPY . .
ARG OLLAMA_SKIP_CUDA_GENERATE
ARG OLLAMA_SKIP_CUDA_11_GENERATE
ARG OLLAMA_SKIP_CUDA_12_GENERATE
ARG CUDA_V11_ARCHITECTURES
ARG CUDA_V12_ARCHITECTURES
ARG OLLAMA_FAST_BUILD
RUN --mount=type=cache,target=/root/.ccache \
make -C llama -j 8
# Intermediate stages used for ./scripts/build_linux.sh
FROM --platform=linux/amd64 centos:7 AS builder-amd64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV CGO_ENABLED 1
ENV GOARCH amd64
WORKDIR /go/src/github.com/ollama/ollama
FROM --platform=linux/amd64 builder-amd64 AS build-amd64
COPY . .
COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
ARG GOFLAGS
ARG CGO_CFLAGS
ARG OLLAMA_SKIP_ROCM_GENERATE
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-amd64/bin/ollama .
RUN cd dist/linux-$GOARCH && \
tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
cd dist/linux-$GOARCH-rocm && \
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
fi
FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
ARG CMAKE_VERSION
ARG GOLANG_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
ENV CGO_ENABLED 1
ENV GOARCH arm64
WORKDIR /go/src/github.com/ollama/ollama
FROM --platform=linux/arm64 builder-arm64 AS build-arm64
COPY . .
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
ARG GOFLAGS
ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-arm64/bin/ollama .
RUN cd dist/linux-$GOARCH && \
tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
FROM --platform=linux/amd64 scratch AS dist-amd64
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
FROM --platform=linux/arm64 scratch AS dist-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
FROM dist-$TARGETARCH AS dist
# Optimized container images do not cary nested payloads
FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
WORKDIR /go/src/github.com/ollama/ollama
COPY . .
ARG GOFLAGS
ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-amd64/bin/ollama .
FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
WORKDIR /go/src/github.com/ollama/ollama
COPY . .
ARG GOFLAGS
ARG CGO_CFLAGS
RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-arm64/bin/ollama .
# For amd64 container images, filter out cuda/rocm to minimize size
FROM runners-amd64 AS runners-cuda-amd64
RUN rm -rf \
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
./dist/linux-amd64/lib/ollama/runners/rocm*
FROM runners-amd64 AS runners-rocm-amd64
RUN rm -rf \
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
./dist/linux-amd64/lib/ollama/libcu*.so* \
./dist/linux-amd64/lib/ollama/runners/cuda*
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
RUN apt-get update && \
apt-get install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
RUN apt-get update && \
apt-get install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*
COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
# across releases
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
RUN apt-get update && \
apt-get install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*
COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]
FROM runtime-$TARGETARCH
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_VISIBLE_DEVICES=all
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]
...@@ -95,31 +95,17 @@ make -j ...@@ -95,31 +95,17 @@ make -j
Ollama currently vendors [llama.cpp](https://github.com/ggerganov/llama.cpp/) and [ggml](https://github.com/ggerganov/ggml) through a vendoring model. While we generally strive to contribute changes back upstream to avoid drift, we cary a small set of patches which are applied to the tracking commit. A set of make targets are available to aid developers in updating to a newer tracking commit, or to work on changes. Ollama currently vendors [llama.cpp](https://github.com/ggerganov/llama.cpp/) and [ggml](https://github.com/ggerganov/ggml) through a vendoring model. While we generally strive to contribute changes back upstream to avoid drift, we cary a small set of patches which are applied to the tracking commit. A set of make targets are available to aid developers in updating to a newer tracking commit, or to work on changes.
> [!IMPORTANT]
> Prior to merging #7157 we continue to leverage a submodule for llama.cpp which establishes the tracking commit. After merging that PR a new manifest file we be utilized
If you update the vendoring code, start by running the following command to establish the tracking llama.cpp repo in the `./vendor/` directory. If you update the vendoring code, start by running the following command to establish the tracking llama.cpp repo in the `./vendor/` directory.
``` ```
make -C llama apply-patches make apply-patches
``` ```
### Updating Base Commit ### Updating Base Commit
**Pin to new base commit** **Pin to new base commit**
To update to a newer base commit, select the upstream git tag or commit To update to a newer base commit, select the upstream git tag or commit and update `llama/vendoring.env`
> [!IMPORTANT]
> After merging #7157 a manifest will be used instead of the submodule
```
cd llm/llama.cpp
git fetch
git checkout NEW_BASE_COMMIT
cd ..
git add llama.cpp
```
#### Applying patches #### Applying patches
...@@ -128,13 +114,13 @@ When updating to a newer base commit, the existing patches may not apply cleanly ...@@ -128,13 +114,13 @@ When updating to a newer base commit, the existing patches may not apply cleanly
Start by applying the patches. If any of the patches have conflicts, the `git am` will stop at the first failure. Start by applying the patches. If any of the patches have conflicts, the `git am` will stop at the first failure.
``` ```
make -C llama apply-patches make apply-patches
``` ```
If you see an error message about a conflict, go into the `./vendor/` directory, and perform merge resolution using your preferred tool to the patch commit which failed. Save the file(s) and continue the patch series with `git am --continue` . If any additional patches fail, follow the same pattern until the full patch series is applied. Once finished, run a final `create-patches` and `sync` target to ensure everything is updated. If you see an error message about a conflict, go into the `./vendor/` directory, and perform merge resolution using your preferred tool to the patch commit which failed. Save the file(s) and continue the patch series with `git am --continue` . If any additional patches fail, follow the same pattern until the full patch series is applied. Once finished, run a final `create-patches` and `sync` target to ensure everything is updated.
``` ```
make -C llama create-patches sync make create-patches sync
``` ```
Build and test Ollama, and make any necessary changes to the Go code based on the new base commit. Submit your PR to the Ollama repo. Build and test Ollama, and make any necessary changes to the Go code based on the new base commit. Submit your PR to the Ollama repo.
...@@ -144,14 +130,14 @@ Build and test Ollama, and make any necessary changes to the Go code based on th ...@@ -144,14 +130,14 @@ Build and test Ollama, and make any necessary changes to the Go code based on th
When working on new fixes or features that impact vendored code, use the following model. First get a clean tracking repo with all current patches applied: When working on new fixes or features that impact vendored code, use the following model. First get a clean tracking repo with all current patches applied:
``` ```
make -C llama apply-patches make apply-patches
``` ```
Now edit the upstream native code in the `./vendor/` directory. You do not need to commit every change in order to build, a dirty working tree in the tracking repo is OK while developing. Simply save in your editor, and run the following to refresh the vendored code with your changes, build the backend(s) and build ollama: Now edit the upstream native code in the `./vendor/` directory. You do not need to commit every change in order to build, a dirty working tree in the tracking repo is OK while developing. Simply save in your editor, and run the following to refresh the vendored code with your changes, build the backend(s) and build ollama:
``` ```
make -C llama sync make sync
make -C llama -j 8 make -j 8
go build . go build .
``` ```
...@@ -161,7 +147,7 @@ go build . ...@@ -161,7 +147,7 @@ go build .
Iterate until you're ready to submit PRs. Once your code is ready, commit a change in the `./vendor/` directory, then generate the patches for ollama with Iterate until you're ready to submit PRs. Once your code is ready, commit a change in the `./vendor/` directory, then generate the patches for ollama with
``` ```
make -C llama create-patches make create-patches
``` ```
> [!IMPORTANT] > [!IMPORTANT]
......
package llama package llama
//go:generate make -j 8
/* /*
#cgo CFLAGS: -O2 -std=c11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE #cgo CFLAGS: -O2 -std=c11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
#cgo CXXFLAGS: -O2 -std=c++11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE #cgo CXXFLAGS: -O2 -std=c++11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
......
# Helpers for managing our vendored llama.cpp repo and patch set # Helpers for managing our vendored llama.cpp repo and patch set
# TODO - this should include a manifest file at the top of the tree REPO_ROOT:=$(dir $(patsubst %/,%,$(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))))
LLAMACPP_BASE_COMMIT=$(shell cd ../llm/llama.cpp && git rev-parse HEAD) DST_DIR:=$(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))
LLAMACPP_REPO := $(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))vendor/ include $(REPO_ROOT)llama/vendoring
LLAMACPP_REPO := $(REPO_ROOT)llama/vendor/
DST_DIR=$(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))
LLAMACPP_PATCH_DIR := $(DST_DIR)patches/ LLAMACPP_PATCH_DIR := $(DST_DIR)patches/
......
LLAMACPP_BASE_COMMIT=3f1ae2e32cde00c39b96be6d01c2997c29bae555
\ No newline at end of file
set(TARGET ollama_llama_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
set(LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
add_executable(${TARGET} server.cpp utils.hpp httplib.h)
install(TARGETS ${TARGET} RUNTIME)
target_compile_definitions(${TARGET} PRIVATE
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
)
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_SERVER_LDFLAGS})
if (WIN32)
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
target_link_options(${TARGET} PRIVATE -municode -Wl,/subsystem:console)
endif()
target_compile_features(${TARGET} PRIVATE cxx_std_11)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# common logic across linux and darwin
init_vars() {
case "${GOARCH}" in
"amd64")
ARCH="x86_64"
;;
"arm64")
ARCH="arm64"
;;
*)
echo "GOARCH must be set"
echo "this script is meant to be run from within go generate"
exit 1
;;
esac
LLAMACPP_DIR=../llama.cpp
CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on"
CMAKE_TARGETS="--target ollama_llama_server"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
else
# TODO - add additional optimization flags...
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
fi
case $(uname -s) in
"Darwin")
LIB_EXT="dylib"
WHOLE_ARCHIVE="-Wl,-force_load"
NO_WHOLE_ARCHIVE=""
GCC_ARCH="-arch ${ARCH}"
DIST_BASE=../../dist/darwin-${GOARCH}/
PAYLOAD_BASE=../../build/darwin/${GOARCH}
;;
"Linux")
LIB_EXT="so"
WHOLE_ARCHIVE="-Wl,--whole-archive"
NO_WHOLE_ARCHIVE="-Wl,--no-whole-archive"
# Cross compiling not supported on linux - Use docker
GCC_ARCH=""
DIST_BASE=../../dist/linux-${GOARCH}/
PAYLOAD_BASE=../../build/linux/${GOARCH}
;;
*)
;;
esac
if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
fi
GZIP=$(command -v pigz 2>/dev/null || echo "gzip")
RUNNER_BASE="${DIST_BASE}/lib/ollama/runners"
}
git_module_setup() {
if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
echo "Skipping submodule initialization"
return
fi
# Make sure the tree is clean after the directory moves
if [ -d "${LLAMACPP_DIR}/gguf" ]; then
echo "Cleaning up old submodule"
rm -rf ${LLAMACPP_DIR}
fi
git submodule init
git submodule update --force ${LLAMACPP_DIR}
}
apply_patches() {
# apply temporary patches until fix is upstream
for patch in ../patches/*.patch; do
git -c 'user.name=nobody' -c 'user.email=<>' -C ${LLAMACPP_DIR} am ${patch}
done
}
build() {
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
# remove unnecessary build artifacts
rm -f ${BUILD_DIR}/bin/ggml-common.h ${BUILD_DIR}/bin/ggml-metal.metal
}
dist() {
[ -z "${RUNNER}" ] && exit 1
mkdir -p ${RUNNER_BASE}/${RUNNER}/
for f in ${BUILD_DIR}/bin/* ; do
cp ${f} ${RUNNER_BASE}/${RUNNER}/
done
# check for lib directory
if [ -d ${BUILD_DIR}/lib ]; then
for f in ${BUILD_DIR}/lib/* ; do
cp ${f} ${RUNNER_BASE}/${RUNNER}/
done
fi
}
# Compress from the build $BUILD_DIR into the $PAYLOAD_BASE/$RUNNER dir
compress() {
[ -z "${RUNNER}" ] && exit 1
echo "Compressing payloads with ${GZIP} to reduce overall binary size..."
rm -rf "${PAYLOAD_BASE}/${RUNNER}/"
mkdir -p "${PAYLOAD_BASE}/${RUNNER}/"
for f in ${BUILD_DIR}/bin/* ; do
${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" &
compress_pids+=" $!"
done
# check for lib directory
if [ -d ${BUILD_DIR}/lib ]; then
for f in ${BUILD_DIR}/lib/* ; do
${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" &
compress_pids+=" $!"
done
fi
echo
}
wait_for_compress() {
for pid in ${compress_pids}; do
wait $pid
done
echo "Finished compression"
}
install() {
echo "Installing libraries to bin dir ${BUILD_DIR}/bin/"
for lib in $(find ${BUILD_DIR} -name \*.${LIB_EXT} | grep -v "${BUILD_DIR}/bin/" ); do
rm -f "${BUILD_DIR}/bin/$(basename ${lib})"
cp -af "${lib}" "${BUILD_DIR}/bin/"
done
}
# Keep the local tree clean after we're done with the build
cleanup() {
git submodule update --force ${LLAMACPP_DIR}
}
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be ./llm/generate/
# TODO - add hardening to detect missing tools (cmake, etc.)
set -ex
set -o pipefail
compress_pids=""
echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
sign() {
if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp --deep --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama $1
fi
}
COMMON_DARWIN_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_OSX_DEPLOYMENT_TARGET=11.3 -DGGML_METAL_MACOSX_VERSION_MIN=11.3 -DCMAKE_SYSTEM_NAME=Darwin -DGGML_METAL_EMBED_LIBRARY=on -DGGML_OPENMP=off"
case "${GOARCH}" in
"amd64")
COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DGGML_METAL=off -DGGML_NATIVE=off"
if [ -z "$OLLAMA_SKIP_CPU_GENERATE" ]; then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
RUNNER=cpu
BUILD_DIR="../build/darwin/${GOARCH}/${RUNNER}"
echo "Building LCD CPU"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
#
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# Approximately 400% faster than LCD on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
RUNNER=cpu_avx
BUILD_DIR="../build/darwin/${GOARCH}/${RUNNER}"
echo "Building AVX CPU"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
#
# ~2013 CPU Dynamic library
# Approximately 10% faster than AVX on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=on -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
RUNNER=cpu_avx2
BUILD_DIR="../build/darwin/${GOARCH}/${RUNNER}"
echo "Building AVX2 CPU"
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
fi
;;
"arm64")
if [ -z "$OLLAMA_SKIP_METAL_GENERATE" ]; then
init_vars
CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} ${CMAKE_DEFS}"
RUNNER="metal"
BUILD_DIR="../build/darwin/${GOARCH}/${RUNNER}"
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
fi
;;
*)
echo "GOARCH must be set"
echo "this script is meant to be run from within go generate"
exit 1
;;
esac
cleanup
wait_for_compress
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment