Merge pull request #2885 from dhiltgen/rocm_v6_only

Revamp ROCm support

Merge pull request #2885 from dhiltgen/rocm_v6_only
Revamp ROCm support
3c8df380 · Daniel Hiltgen · GitHub · 7d564835 · 6c5ccb11 · 3c8df380
Unverified Commit 3c8df380 authored Mar 07, 2024 by Daniel Hiltgen Committed by GitHub Mar 07, 2024
7 changed files
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -179,17 +179,21 @@ fi
 if [ -d "${ROCM_PATH}" ]; then
    echo "ROCm libraries detected - building dynamic ROCm library"
-    if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
+    if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
-        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
+        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
    fi
    init_vars
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
    BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
-    EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
+    EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
    build
-    # Note: the ROCM libs and runtime library files are too large to embed, so we depend on
+    # Record the ROCM dependencies
-    #       them being present at runtime on the host
+    rm -f "${BUILD_DIR}/lib/deps.txt"
+    touch "${BUILD_DIR}/lib/deps.txt"
+    for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
+        echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt"
+    done
    compress_libs
 fi

--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -2,19 +2,52 @@
 $ErrorActionPreference = "Stop"
+function amdGPUs {
+    if ($env:AMDGPU_TARGETS) {
+        return $env:AMDGPU_TARGETS
+    }
+    # TODO - load from some common data file for linux + windows build consistency
+    $GPU_LIST = @(
+        "gfx900"
+        "gfx906:xnack-"
+        "gfx908:xnack-"
+        "gfx90a:xnack+"
+        "gfx90a:xnack-"
+        "gfx1010"
+        "gfx1012"
+        "gfx1030"
+        "gfx1100"
+        "gfx1101"
+        "gfx1102"
+    )
+    $GPU_LIST -join ';'
+}
 function init_vars {
+    # Verify the environment is a Developer Shell for MSVC 2019
+    write-host $env:VSINSTALLDIR
+    if (($env:VSINSTALLDIR -eq $null)) {
+        Write-Error "`r`nBUILD ERROR - YOUR DEVELOPMENT ENVIRONMENT IS NOT SET UP CORRECTLY`r`nTo build Ollama you must run from an MSVC Developer Shell`r`nSee .\docs\development.md for instructions to set up your dev environment"
+        exit 1
+    }
    $script:SRC_DIR = $(resolve-path "..\..\")
    $script:llamacppDir = "../llama.cpp"
-    $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off",  "-A", "x64")
+    $script:cmakeDefs = @(
+        "-DBUILD_SHARED_LIBS=on",
+        "-DLLAMA_NATIVE=off"
+        )
    $script:cmakeTargets = @("ext_server")
    $script:ARCH = "amd64" # arm not yet supported.
    if ($env:CGO_CFLAGS -contains "-g") {
-        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
+        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
        $script:config = "RelWithDebInfo"
    } else {
-        $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
+        $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release")
        $script:config = "Release"
    }
+    if ($null -ne $env:CMAKE_SYSTEM_VERSION) {
+        $script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}")
+    }
    # Try to find the CUDA dir
    if ($env:CUDA_LIB_DIR -eq $null) {
        $d=(get-command -ea 'silentlycontinue' nvcc).path
@@ -157,7 +190,7 @@ apply_patches
 $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
 init_vars
-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
 $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
 write-host "Building LCD CPU"
 build
@@ -166,7 +199,7 @@ sign
 compress_libs
 init_vars
-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
 $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
 write-host "Building AVX CPU"
 build
@@ -175,7 +208,7 @@ sign
 compress_libs
 init_vars
-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
 $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
 write-host "Building AVX2 CPU"
 build
@@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) {
    }
    init_vars
    $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
-    $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+    $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
+    write-host "Building CUDA"
    build
    install
    sign
    compress_libs
 }
-# TODO - actually implement ROCm support on windows
-$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
-rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
+if ($null -ne $env:HIP_PATH) {
-md "${script:buildDir}/lib" -ea 0 > $null
+    $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
-echo $null >> "${script:buildDir}/lib/.generated"
+    if ($null -ne $script:ROCM_VERSION) {
+        $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
+    }
+    init_vars
+    $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
+    $script:cmakeDefs += @(
+        "-G", "Ninja", 
+        "-DCMAKE_C_COMPILER=clang.exe",
+        "-DCMAKE_CXX_COMPILER=clang++.exe",
+        "-DLLAMA_HIPBLAS=on",
+        "-DLLAMA_AVX=on",
+        "-DLLAMA_AVX2=off",
+        "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
+        "-DAMDGPU_TARGETS=$(amdGPUs)",
+        "-DGPU_TARGETS=$(amdGPUs)"
+        )
+    # Make sure the ROCm binary dir is first in the path
+    $env:PATH="$env:HIP_PATH\bin;$env:VSINSTALLDIR\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja;$env:PATH"
+    # We have to clobber the LIB var from the developer shell for clang to work properly
+    $env:LIB=""
+    write-host "Building ROCm"
+    build
+    # Ninja doesn't prefix with config name
+    ${script:config}=""
+    install
+    if ($null -ne $script:DUMPBIN) {
+        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
+    }
+    sign
+    compress_libs
+}
 cleanup
 write-host "`ngo generate completed"
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -19,7 +19,7 @@ type LLM interface {
 	Close()
 }
-func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
+func New(model string, adapters, projectors []string, opts api.Options) (LLM, error) {
 	if _, err := os.Stat(model); err != nil {
 		return nil, err
 	}
@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	opts.RopeFrequencyBase = 0.0
 	opts.RopeFrequencyScale = 0.0
-	return newLlmServer(info, workDir, model, adapters, projectors, opts)
+	return newLlmServer(info, model, adapters, projectors, opts)
 }
 // Give any native cgo implementations an opportunity to initialize
-func Init(workdir string) error {
+func Init() error {
-	return nativeInit(workdir)
+	return nativeInit()
 }
-func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
+func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
 	dynLibs := getDynLibs(gpuInfo)
 	// Check to see if the user has requested a specific library instead of auto-detecting
@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto
 	_, err := os.Stat(dynLibs[0])
 	if err != nil {
 		slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0]))
-		err = nativeInit(workDir)
+		err = nativeInit()
 		if err != nil {
 			return nil, err
 		}

--- a/llm/payload_common.go
+++ b/llm/payload_common.go
@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool {
 	return false
 }
-func nativeInit(workdir string) error {
+func nativeInit() error {
 	slog.Info("Extracting dynamic libraries...")
+	assetsDir, err := gpu.AssetsDir()
+	if err != nil {
+		return err
+	}
 	if runtime.GOOS == "darwin" {
-		err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
+		err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal")
 		if err != nil {
 			if err == payloadMissing {
 				// TODO perhaps consider this a hard failure on arm macs?
@@ -115,10 +119,10 @@ func nativeInit(workdir string) error {
 			}
 			return err
 		}
-		os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
+		os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir)
 	}
-	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
+	libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*")
 	if err != nil {
 		if err == payloadMissing {
 			slog.Info(fmt.Sprintf("%s", payloadMissing))
@@ -149,17 +153,13 @@ func nativeInit(workdir string) error {
 	return nil
 }
-func extractDynamicLibs(workDir, glob string) ([]string, error) {
+func extractDynamicLibs(assetsDir, glob string) ([]string, error) {
 	files, err := fs.Glob(libEmbed, glob)
 	if err != nil || len(files) == 0 {
 		return nil, payloadMissing
 	}
 	libs := []string{}
-	// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
-	// and tracking by version so we don't reexpand the files every time
-	// Also maybe consider lazy loading only what is needed
 	g := new(errgroup.Group)
 	for _, file := range files {
 		pathComps := strings.Split(file, "/")
@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 		g.Go(func() error {
 			// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
 			// Include the variant in the path to avoid conflicts between multiple server libs
-			targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
+			targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3])
 			srcFile, err := libEmbed.Open(file)
 			if err != nil {
 				return fmt.Errorf("read payload %s: %v", file, err)
 			}
 			defer srcFile.Close()
 			if err := os.MkdirAll(targetDir, 0o755); err != nil {
-				return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
+				return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
 			}
 			src := io.Reader(srcFile)
 			filename := file
@@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 				libs = append(libs, destFile)
 			}
-			_, err = os.Stat(destFile)
+			destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
-			switch {
+			if err != nil {
-			case errors.Is(err, os.ErrNotExist):
+				return fmt.Errorf("write payload %s: %v", file, err)
-				destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+			}
-				if err != nil {
+			defer destFp.Close()
-					return fmt.Errorf("write payload %s: %v", file, err)
+			if _, err := io.Copy(destFp, src); err != nil {
-				}
+				return fmt.Errorf("copy payload %s: %v", file, err)
-				defer destFile.Close()
-				if _, err := io.Copy(destFile, src); err != nil {
-					return fmt.Errorf("copy payload %s: %v", file, err)
-				}
-			case err != nil:
-				return fmt.Errorf("stat payload %s: %v", file, err)
 			}
 			return nil
 		})
@@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 	return libs, g.Wait()
 }
-func extractPayloadFiles(workDir, glob string) error {
+func extractPayloadFiles(assetsDir, glob string) error {
 	files, err := fs.Glob(libEmbed, glob)
 	if err != nil || len(files) == 0 {
 		return payloadMissing
@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error {
 			return fmt.Errorf("read payload %s: %v", file, err)
 		}
 		defer srcFile.Close()
-		if err := os.MkdirAll(workDir, 0o755); err != nil {
+		if err := os.MkdirAll(assetsDir, 0o755); err != nil {
-			return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
+			return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
 		}
 		src := io.Reader(srcFile)
 		filename := file
@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error {
 			filename = strings.TrimSuffix(filename, ".gz")
 		}
-		destFile := filepath.Join(workDir, filepath.Base(filename))
+		destFile := filepath.Join(assetsDir, filepath.Base(filename))
 		_, err = os.Stat(destFile)
 		switch {
 		case errors.Is(err, os.ErrNotExist):
-			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+			destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
 			if err != nil {
 				return fmt.Errorf("write payload %s: %v", file, err)
 			}
-			defer destFile.Close()
+			defer destFp.Close()
-			if _, err := io.Copy(destFile, src); err != nil {
+			if _, err := io.Copy(destFp, src); err != nil {
 				return fmt.Errorf("copy payload %s: %v", file, err)
 			}
 		case err != nil:
 			return fmt.Errorf("stat payload %s: %v", file, err)
+		case err == nil:
+			slog.Debug("payload already exists: " + destFile)
 		}
 	}
 	return nil

--- a/llm/payload_linux.go
+++ b/llm/payload_linux.go
@@ -4,5 +4,5 @@ import (
 	"embed"
 )
-//go:embed llama.cpp/build/linux/*/*/lib/*.so*
+//go:embed llama.cpp/build/linux/*/*/lib/*
 var libEmbed embed.FS
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do
        .
    docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
    docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
+    docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/
    docker rm builder-$TARGETARCH
 done
--- a/server/routes.go
+++ b/server/routes.go
@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute
 // load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
 func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
-	workDir := c.GetString("workDir")
 	needLoad := loaded.runner == nil || // is there a model loaded?
 		loaded.ModelPath != model.ModelPath || // has the base model changed?
 		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
 			loaded.Options = nil
 		}
-		llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
+		llmRunner, err := llm.New(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
 		if err != nil {
 			// some older models are not compatible with newer versions of llama.cpp
 			// show a generalized compatibility error until there is a better way to
@@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error {
 		os.Exit(0)
 	}()
-	if err := llm.Init(s.WorkDir); err != nil {
+	if err := llm.Init(); err != nil {
 		return fmt.Errorf("unable to initialize llm library %w", err)
 	}
 	if runtime.GOOS == "linux" { // TODO - windows too