"megatron/vscode:/vscode.git/clone" did not exist on "99a0c39ea23936072cfd7fb1ce1fd2b8298e9b20"
Unverified Commit dcfb7a10 authored by Michael Yang's avatar Michael Yang Committed by GitHub
Browse files

next build (#8539)



* add build to .dockerignore

* test: only build one arch

* add build to .gitignore

* fix ccache path

* filter amdgpu targets

* only filter if autodetecting

* Don't clobber gpu list for default runner

This ensures the GPU specific environment variables are set properly

* explicitly set CXX compiler for HIP

* Update build_windows.ps1

This isn't complete, but is close.  Dependencies are missing, and it only builds the "default" preset.

* build: add ollama subdir

* add .git to .dockerignore

* docs: update development.md

* update build_darwin.sh

* remove unused scripts

* llm: add cwd and build/lib/ollama to library paths

* default DYLD_LIBRARY_PATH to LD_LIBRARY_PATH in runner on macOS

* add additional cmake output vars for msvc

* interim edits to make server detection logic work with dll directories like lib/ollama/cuda_v12

* remove unncessary filepath.Dir, cleanup

* add hardware-specific directory to path

* use absolute server path

* build: linux arm

* cmake install targets

* remove unused files

* ml: visit each library path once

* build: skip cpu variants on arm

* build: install cpu targets

* build: fix workflow

* shorter names

* fix rocblas install

* docs: clean up development.md

* consistent build dir removal in development.md

* silence -Wimplicit-function-declaration build warnings in ggml-cpu

* update readme

* update development readme

* llm: update library lookup logic now that there is one runner (#8587)

* tweak development.md

* update docs

* add windows cuda/rocm tests

---------
Co-authored-by: default avatarjmorganca <jmorganca@gmail.com>
Co-authored-by: default avatarDaniel Hiltgen <daniel@ollama.com>
parent 2ef3c803
package ggml
// #cgo CPPFLAGS: -DGGML_USE_METAL -DGGML_USE_BLAS
// #cgo LDFLAGS: -framework Foundation
import "C"
import (
_ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-blas"
_ "github.com/ollama/ollama/ml/backend/ggml/ggml/src/ggml-metal"
)
//go:build debug
package ggml
// #cgo CPPFLAGS: -DOLLAMA_DEBUG
import "C"
package runners
import (
"log/slog"
"os"
"path/filepath"
"runtime"
"slices"
"strings"
"sync"
"golang.org/x/sys/cpu"
"github.com/ollama/ollama/envconfig"
)
var (
runnersDir = ""
once = sync.Once{}
)
type CPUCapability uint32
// Override at build time when building base GPU runners
// var GPURunnerCPUCapability = CPUCapabilityAVX
const (
CPUCapabilityNone CPUCapability = iota
CPUCapabilityAVX
CPUCapabilityAVX2
// TODO AVX512
)
func (c CPUCapability) String() string {
switch c {
case CPUCapabilityAVX:
return "avx"
case CPUCapabilityAVX2:
return "avx2"
default:
return "no vector extensions"
}
}
func GetCPUCapability() CPUCapability {
if cpu.X86.HasAVX2 {
return CPUCapabilityAVX2
}
if cpu.X86.HasAVX {
return CPUCapabilityAVX
}
// else LCD
return CPUCapabilityNone
}
// Return the location where runners were located
// empty string indicates only builtin is present
func Locate() string {
once.Do(locateRunnersOnce)
return runnersDir
}
// searches for runners in a prioritized set of locations
// 1. local build, with executable at the top of the tree
// 2. lib directory relative to executable
func locateRunnersOnce() {
exe, err := os.Executable()
if err != nil {
slog.Debug("runner locate", "error", err)
}
paths := []string{
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
filepath.Join(filepath.Dir(exe), "lib", "ollama", "runners"),
}
for _, path := range paths {
if _, err := os.Stat(path); err == nil {
runnersDir = path
slog.Debug("runners located", "dir", runnersDir)
return
}
}
// Fall back to built-in
slog.Debug("no dynamic runners detected, using only built-in")
runnersDir = ""
}
// Return the well-known name of the builtin runner for the given platform
func BuiltinName() string {
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
return "metal"
}
return "cpu"
}
// directory names are the name of the runner and may contain an optional
// variant prefixed with '_' as the separator. For example, "cuda_v11" and
// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
// lowest common denominator
func GetAvailableServers() map[string]string {
once.Do(locateRunnersOnce)
servers := make(map[string]string)
exe, err := os.Executable()
if err == nil {
servers[BuiltinName()] = exe
}
if runnersDir == "" {
return servers
}
// glob runnersDir for files that start with ollama_
pattern := filepath.Join(runnersDir, "*", "ollama_*")
files, err := filepath.Glob(pattern)
if err != nil {
slog.Debug("could not glob", "pattern", pattern, "error", err)
return nil
}
for _, file := range files {
slog.Debug("availableServers : found", "file", file)
runnerName := filepath.Base(filepath.Dir(file))
// Special case for our GPU runners - if compiled with standard AVX flag
// detect incompatible system
// Custom builds will omit this and its up to the user to ensure compatibility
parsed := strings.Split(runnerName, "_")
if len(parsed) == 3 && parsed[2] == "avx" && !cpu.X86.HasAVX {
slog.Info("GPU runner incompatible with host system, CPU does not have AVX", "runner", runnerName)
continue
}
servers[runnerName] = file
}
return servers
}
// serversForGpu returns a list of compatible servers give the provided GPU library/variant
func ServersForGpu(requested string) []string {
// glob workDir for files that start with ollama_
availableServers := GetAvailableServers()
// Short circuit if the only option is built-in
if _, ok := availableServers[BuiltinName()]; ok && len(availableServers) == 1 {
return []string{BuiltinName()}
}
bestCPUVariant := GetCPUCapability()
requestedLib := strings.Split(requested, "_")[0]
servers := []string{}
// exact match first
for a := range availableServers {
short := a
parsed := strings.Split(a, "_")
if len(parsed) == 3 {
// Strip off optional _avx for comparison
short = parsed[0] + "_" + parsed[1]
}
if a == requested || short == requested {
servers = []string{a}
}
}
// If no exact match, then try without variant
if len(servers) == 0 {
alt := []string{}
for a := range availableServers {
if requestedLib == strings.Split(a, "_")[0] && a != requested {
alt = append(alt, a)
}
}
slices.Sort(alt)
servers = append(servers, alt...)
}
// Finally append the best CPU option if found, then builtin
if bestCPUVariant != CPUCapabilityNone {
for cmp := range availableServers {
if cmp == "cpu_"+bestCPUVariant.String() {
servers = append(servers, cmp)
break
}
}
}
servers = append(servers, BuiltinName())
return servers
}
// Return the optimal server for this CPU architecture
func ServerForCpu() string {
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
return BuiltinName()
}
variant := GetCPUCapability()
availableServers := GetAvailableServers()
if variant != CPUCapabilityNone {
for cmp := range availableServers {
if cmp == "cpu_"+variant.String() {
return cmp
}
}
}
return BuiltinName()
}
#!/bin/sh
set -eu
usage() {
echo "usage: $(basename $0) VERSION"
exit 1
}
[ "$#" -eq 1 ] || usage
export VERSION="$1"
# build universal MacOS binary
sh $(dirname $0)/build_darwin.sh
# # build arm64 and amd64 Linux binaries
sh $(dirname $0)/build_linux.sh
# # build arm64 and amd64 Docker images
sh $(dirname $0)/build_docker.sh
......@@ -2,55 +2,92 @@
set -e
. $(dirname $0)/env.sh
mkdir -p dist
# These require Xcode v13 or older to target MacOS v11
# If installed to an alternate location use the following to enable
# export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
# export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer
export CGO_CFLAGS=-mmacosx-version-min=11.3
export CGO_CXXFLAGS=-mmacosx-version-min=11.3
export CGO_LDFLAGS=-mmacosx-version-min=11.3
rm -rf llama/build dist/darwin-*
# Generate the universal ollama binary for stand-alone usage: metal + avx
echo "Building binary"
echo "Building darwin arm64"
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
echo "Building darwin amd64 with AVX enabled"
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist_exe
lipo -create -output dist/ollama-darwin dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
# sign the binary and rename it
if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama-darwin
else
echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
fi
ditto -c -k --keepParent dist/ollama-darwin dist/temp.zip
if [ -n "$APPLE_IDENTITY" ]; then
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
fi
rm -f dist/temp.zip
# Build the app bundle
echo "Building app"
echo "Building darwin amd64 with runners"
rm dist/darwin-amd64/bin/ollama
GOOS=darwin ARCH=amd64 GOARCH=amd64 make -j 8 dist
# Generate the universal ollama binary for the app bundle: metal + no-avx
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
# build and optionally sign the mac app
npm install --prefix macapp
if [ -n "$APPLE_IDENTITY" ]; then
npm run --prefix macapp make:sign
else
npm run --prefix macapp make
status() { echo >&2 ">>> $@"; }
usage() {
echo "usage: $(basename $0) [build [sign]]"
exit 1
}
export VERSION=${VERSION:-$(git describe --tags --dirty)}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${VERSION#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
export CGO_CPPFLAGS='-mmacosx-version-min=11.3'
ARCHS="arm64 amd64"
while getopts "a:h" OPTION; do
case $OPTION in
a) ARCHS=$OPTARG ;;
h) usage ;;
esac
done
shift $(( $OPTIND - 1 ))
_build_darwin() {
for ARCH in $ARCHS; do
status "Building darwin $ARCH"
INSTALL_PREFIX=dist/darwin-$ARCH/
GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 go build -o $INSTALL_PREFIX .
if [ "$ARCH" = "amd64" ]; then
status "Building darwin $ARCH dynamic backends"
cmake -B build/darwin-$ARCH \
-DCMAKE_OSX_ARCHITECTURES=x86_64 \
-DCMAKE_OSX_DEPLOYMENT_TARGET=11.3
cmake --build build/darwin-$ARCH --target ggml-cpu -j
install build/darwin-$ARCH/lib/ollama/*.{dylib,so} $INSTALL_PREFIX
fi
done
}
_sign_darwin() {
status "Creating universal binary..."
lipo -create -output dist/darwin/ollama dist/darwin/*/ollama
if [ -z "$APPLE_IDENTITY" ]; then
status "No APPLE_IDENTITY set, skipping code signing"
return
fi
for F in dist/darwin/ollama dist/darwin/amd64/lib*; do
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F
done
# create a temporary zip for notarization
TEMP=$(mktemp -u).zip
ditto -c -k --keepParent dist/darwin/ollama "$TEMP"
xcrun notarytool submit dist/darwin/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
rm -f "$TEMP"
# create a universal tarball
tar -cf dist/ollama-darwin.tar --strip-components 2 dist/darwin/ollama
tar -rf dist/ollama-darwin.tar --strip-components 3 dist/darwin/amd64/lib*
gzip -9vc <dist/ollama-darwin.tar >dist/ollama-darwin.tgz
}
_build_macapp() {
# build and optionally sign the mac app
npm install --prefix macapp
if [ -n "$APPLE_IDENTITY" ]; then
npm run --prefix macapp make:sign
else
npm run --prefix macapp make
fi
mv ./macapp/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
}
if [ "$#" -eq 0 ]; then
_build_darwin
_sign_darwin
_build_macapp
exit 0
fi
cp macapp/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
for CMD in "$@"; do
case $CMD in
build) _build_darwin ;;
sign) _sign_darwin ;;
macapp) _build_macapp ;;
*) usage ;;
esac
done
......@@ -18,7 +18,7 @@ docker buildx build \
--output type=local,dest=./dist/ \
--platform=${PLATFORM} \
${OLLAMA_COMMON_BUILD_ARGS} \
--target dist \
--target archive \
-f Dockerfile \
.
......@@ -26,4 +26,4 @@ docker buildx build \
if echo $PLATFORM | grep "," > /dev/null ; then
mv -f ./dist/linux_*64/ollama* ./dist/
rmdir ./dist/linux_*64
fi
\ No newline at end of file
fi
......@@ -80,18 +80,61 @@ function checkEnv() {
function buildOllama() {
if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
write-host "Building ollama runners"
Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
& make -j 12 dist
New-Item "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ItemType Directory -ea 0
# Default first, then conditionall ROCm and cuda v11
write-host "Building Default native backend libraries"
$env:CMAKE_GENERATOR="ninja"
& cmake --preset Default
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --build --preset Default -j 12
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build -j 12
# TODO - add steps for v11 and ROCm
#
# if ("$script:CUDA_DIRS".Contains("v11") -and "$script:CUDA_DIRS".Contains("v12")) {
# # We assume the default is v12, so override for v11
# $origCUDA_PATH=$env:CUDA_PATH
# $hashEnv = @{}
# Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
# $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
# write-host "$v11"
# # $env:CUDA_PATH=$hashEnv[$v11]
# # $env:CUDACXX=$hashEnv[$v11]+"\bin\nvcc.exe"
# $env:CUDAToolkit_ROOT=$hashEnv[$v11]
# # ls env:
# write-host "Building CUDA v11 backend libraries"
# & cmake --preset "CUDA 11"
# $env:CUDA_PATH=$origCUDA_PATH
# exit(1)
# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# # & cmake --build --preset "CUDA 11" -j 12
# # if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# }
# if ($env:HIP_PATH) {
# write-host "Building ROCm backend libraries"
# $env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe"
# $env:HIP_PLATFORM="amd"
# $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
# & cmake --preset "ROCm"
# $env:HIPCXX=""
# $env:HIP_PLATFORM=""
# $env:CMAKE_PREFIX_PATH=""
# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# & cmake --build --preset "ROCm" -j 12
# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# }
} else {
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
}
write-host "Building ollama CLI"
& go build -trimpath -ldflags "-s -w -X=github.com/ollama/ollama/version.Version=$script:VERSION -X=github.com/ollama/ollama/server.mode=release" .
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\
cp .\ollama.exe "${script:DIST_DIR}\"
}
function buildApp() {
......
#/bin/sh
# Wrapper script to speed up builds by disabling some permutations and reduce compatibility matrix
# Don't use for release builds, but suitable for local developer iteration
# Only build cuda v12
export OLLAMA_SKIP_CUDA_11_GENERATE=1
# Major versions only
export CUDA_V12_ARCHITECTURES="60;70;80;90"
# Skip ROCm
export OLLAMA_SKIP_ROCM_GENERATE=1
# Disable various less common quants and fattn
export OLLAMA_FAST_BUILD=1
if [ $# -ne 1 ] ; then
echo "Usage: ./scripts/fast.sh <build_script>"
exit 1
fi
exec $1
\ No newline at end of file
# Set your variables here.
REPO="jmorganca/ollama"
# Check if VERSION is set
if [[ -z "${VERSION}" ]]; then
echo "VERSION is not set. Please set the VERSION environment variable."
exit 1
fi
OS=$(go env GOOS)
./script/build_${OS}.sh
# Create a new tag if it doesn't exist.
if ! git rev-parse v$VERSION >/dev/null 2>&1; then
git tag v$VERSION
fi
git push origin v$VERSION
# Create a new release.
gh release create -p v$VERSION -t v$VERSION
# Upload the zip file.
gh release upload v$VERSION ./dist/* --clobber
#!/bin/sh
# Script for common Dockerfile dependency installation in redhat linux based images
set -ex
set -o pipefail
MACHINE=$(uname -m)
if grep -i "centos" /etc/system-release >/dev/null; then
# As of 7/1/2024 mirrorlist.centos.org has been taken offline, so adjust accordingly
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
# Centos 7 derivatives have too old of a git version to run our generate script
# uninstall and ignore failures
yum remove -y git
yum -y install epel-release centos-release-scl
# The release packages reinstate the mirrors, undo that again
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum -y install dnf
if [ "${MACHINE}" = "x86_64" ]; then
yum -y install https://repo.ius.io/ius-release-el7.rpm
dnf install -y git236
else
dnf install -y rh-git227-git
ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git
fi
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz findutils
elif grep -i "rocky" /etc/system-release >/dev/null; then
# Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC)
cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo
[vault]
name=Rocky Vault
baseurl=https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/
gpgcheck=1
enabled=1
countme=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-rockyofficial
EOF
dnf install -y git \
gcc-toolset-10-gcc-10.2.1-8.2.el8 \
gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
findutils \
yum-utils \
pigz
else
echo "ERROR Unexpected distro"
exit 1
fi
if [ "${MACHINE}" = "x86_64" ] ; then
curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /tmp --strip-components 1 && \
mv /tmp/ccache /usr/local/bin/
else
yum -y install epel-release
yum install -y ccache
fi
if [ -n "${CMAKE_VERSION}" ]; then
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
fi
if [ -n "${GOLANG_VERSION}" ]; then
if [ "${MACHINE}" = "x86_64" ]; then
GO_ARCH="amd64"
else
GO_ARCH="arm64"
fi
mkdir -p /usr/local
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-${GO_ARCH}.tar.gz | tar xz -C /usr/local
ln -s /usr/local/go/bin/go /usr/local/bin/go
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt
fi
......@@ -33,7 +33,6 @@ import (
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/model/mllama"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/runners"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
......@@ -1259,14 +1258,6 @@ func Serve(ln net.Listener) error {
done()
}()
// Locate and log what runners are present at startup
var runnerNames []string
for v := range runners.GetAvailableServers() {
runnerNames = append(runnerNames, v)
}
slog.Info("Dynamic LLM libraries", "runners", runnerNames)
slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
s.sched.Run(schedCtx)
// At startup we retrieve GPU information so we can get log messages before loading a model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment