Commit 380378cc authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Use our libraries first

Trying to live off the land for cuda libraries was not the right strategy.  We need to use the version we compiled against to ensure things work properly
parent 0963c650
...@@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList { ...@@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.") slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
} }
// On windows we bundle the nvidia library one level above the runner dir
depPath := ""
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
depPath = filepath.Dir(envconfig.RunnersDir)
}
var memInfo C.mem_info_t var memInfo C.mem_info_t
resp := []GpuInfo{} resp := []GpuInfo{}
...@@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList { ...@@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.Major = int(memInfo.major) gpuInfo.Major = int(memInfo.major)
gpuInfo.Minor = int(memInfo.minor) gpuInfo.Minor = int(memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does... // TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo) resp = append(resp, gpuInfo)
......
...@@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr ...@@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
pathEnv = "PATH" pathEnv = "PATH"
} }
// append the server directory to LD_LIBRARY_PATH/PATH // prepend the server directory to LD_LIBRARY_PATH/PATH
libraryPaths := []string{dir} libraryPaths := []string{dir}
if libraryPath, ok := os.LookupEnv(pathEnv); ok { if libraryPath, ok := os.LookupEnv(pathEnv); ok {
// Append our runner directory to the path // Append our runner directory to the path
// This will favor system libraries over our bundled library dependencies // This will favor system libraries over our bundled library dependencies
libraryPaths = append(filepath.SplitList(libraryPath), libraryPaths...) libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
} }
// Note: we always put the dependency path first // Note: we always put the dependency path first
...@@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr ...@@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
sem: semaphore.NewWeighted(int64(numParallel)), sem: semaphore.NewWeighted(int64(numParallel)),
} }
libEnv := fmt.Sprintf("%s=%s", pathEnv, strings.Join(libraryPaths, string(filepath.ListSeparator))) s.cmd.Env = os.Environ()
s.cmd.Env = append(os.Environ(), libEnv)
s.cmd.Stdout = os.Stdout s.cmd.Stdout = os.Stdout
s.cmd.Stderr = s.status s.cmd.Stderr = s.status
// TODO - multiple GPU selection logic... visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
key, val := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv() pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
if key != "" {
s.cmd.Env = append(s.cmd.Env, key+"="+val) // Update or add the path and visible devices variable with our adjusted version
pathNeeded := true
devicesNeeded := visibleDevicesEnv != ""
for i := range s.cmd.Env {
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
if strings.EqualFold(cmp[0], pathEnv) {
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
pathNeeded = false
} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
devicesNeeded = false
}
}
if pathNeeded {
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
}
if devicesNeeded {
s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
} }
slog.Info("starting llama server", "cmd", s.cmd.String()) slog.Info("starting llama server", "cmd", s.cmd.String())
...@@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr ...@@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
continue continue
} }
// TODO - make sure this is all wired up correctly
// if err = s.WaitUntilRunning(); err != nil {
// slog.Error("error starting llama server", "server", servers[i], "error", err)
// s.Close()
// finalErr = err
// continue
// }
return s, nil return s, nil
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment