Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
b08870af
Unverified
Commit
b08870af
authored
May 06, 2024
by
Daniel Hiltgen
Committed by
GitHub
May 06, 2024
Browse files
Merge pull request #4188 from dhiltgen/use_our_lib
User our bundled libraries (cuda) instead of the host library
parents
3ecae420
380378cc
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
15 deletions
+31
-15
gpu/gpu.go
gpu/gpu.go
+7
-0
llm/server.go
llm/server.go
+24
-15
No files found.
gpu/gpu.go
View file @
b08870af
...
@@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
...
@@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
slog
.
Warn
(
"CPU does not have AVX or AVX2, disabling GPU support."
)
slog
.
Warn
(
"CPU does not have AVX or AVX2, disabling GPU support."
)
}
}
// On windows we bundle the nvidia library one level above the runner dir
depPath
:=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
!=
""
{
depPath
=
filepath
.
Dir
(
envconfig
.
RunnersDir
)
}
var
memInfo
C
.
mem_info_t
var
memInfo
C
.
mem_info_t
resp
:=
[]
GpuInfo
{}
resp
:=
[]
GpuInfo
{}
...
@@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
Major
=
int
(
memInfo
.
major
)
gpuInfo
.
Major
=
int
(
memInfo
.
major
)
gpuInfo
.
Minor
=
int
(
memInfo
.
minor
)
gpuInfo
.
Minor
=
int
(
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp
=
append
(
resp
,
gpuInfo
)
resp
=
append
(
resp
,
gpuInfo
)
...
...
llm/server.go
View file @
b08870af
...
@@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if
runtime
.
GOOS
==
"windows"
{
if
runtime
.
GOOS
==
"windows"
{
pathEnv
=
"PATH"
pathEnv
=
"PATH"
}
}
//
a
ppend the server directory to LD_LIBRARY_PATH/PATH
// p
re
pend the server directory to LD_LIBRARY_PATH/PATH
libraryPaths
:=
[]
string
{
dir
}
libraryPaths
:=
[]
string
{
dir
}
if
libraryPath
,
ok
:=
os
.
LookupEnv
(
pathEnv
);
ok
{
if
libraryPath
,
ok
:=
os
.
LookupEnv
(
pathEnv
);
ok
{
// Append our runner directory to the path
// Append our runner directory to the path
// This will favor system libraries over our bundled library dependencies
// This will favor system libraries over our bundled library dependencies
libraryPaths
=
append
(
filepath
.
SplitList
(
libraryPath
)
,
libraryPaths
...
)
libraryPaths
=
append
(
libraryPaths
,
filepath
.
SplitList
(
libraryPath
)
...
)
}
}
// Note: we always put the dependency path first
// Note: we always put the dependency path first
...
@@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
sem
:
semaphore
.
NewWeighted
(
int64
(
numParallel
)),
sem
:
semaphore
.
NewWeighted
(
int64
(
numParallel
)),
}
}
libEnv
:=
fmt
.
Sprintf
(
"%s=%s"
,
pathEnv
,
strings
.
Join
(
libraryPaths
,
string
(
filepath
.
ListSeparator
)))
s
.
cmd
.
Env
=
os
.
Environ
()
s
.
cmd
.
Env
=
append
(
os
.
Environ
(),
libEnv
)
s
.
cmd
.
Stdout
=
os
.
Stdout
s
.
cmd
.
Stdout
=
os
.
Stdout
s
.
cmd
.
Stderr
=
s
.
status
s
.
cmd
.
Stderr
=
s
.
status
// TODO - multiple GPU selection logic...
visibleDevicesEnv
,
visibleDevicesEnvVal
:=
gpu
.
GpuInfoList
(
gpus
)
.
GetVisibleDevicesEnv
()
key
,
val
:=
gpu
.
GpuInfoList
(
gpus
)
.
GetVisibleDevicesEnv
()
pathEnvVal
:=
strings
.
Join
(
libraryPaths
,
string
(
filepath
.
ListSeparator
))
if
key
!=
""
{
s
.
cmd
.
Env
=
append
(
s
.
cmd
.
Env
,
key
+
"="
+
val
)
// Update or add the path and visible devices variable with our adjusted version
pathNeeded
:=
true
devicesNeeded
:=
visibleDevicesEnv
!=
""
for
i
:=
range
s
.
cmd
.
Env
{
cmp
:=
strings
.
SplitN
(
s
.
cmd
.
Env
[
i
],
"="
,
2
)
if
strings
.
EqualFold
(
cmp
[
0
],
pathEnv
)
{
s
.
cmd
.
Env
[
i
]
=
pathEnv
+
"="
+
pathEnvVal
pathNeeded
=
false
}
else
if
devicesNeeded
&&
strings
.
EqualFold
(
cmp
[
0
],
visibleDevicesEnv
)
{
s
.
cmd
.
Env
[
i
]
=
visibleDevicesEnv
+
"="
+
visibleDevicesEnvVal
devicesNeeded
=
false
}
}
if
pathNeeded
{
s
.
cmd
.
Env
=
append
(
s
.
cmd
.
Env
,
pathEnv
+
"="
+
pathEnvVal
)
}
if
devicesNeeded
{
s
.
cmd
.
Env
=
append
(
s
.
cmd
.
Env
,
visibleDevicesEnv
+
"="
+
visibleDevicesEnvVal
)
}
}
slog
.
Info
(
"starting llama server"
,
"cmd"
,
s
.
cmd
.
String
())
slog
.
Info
(
"starting llama server"
,
"cmd"
,
s
.
cmd
.
String
())
...
@@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
continue
continue
}
}
// TODO - make sure this is all wired up correctly
// if err = s.WaitUntilRunning(); err != nil {
// slog.Error("error starting llama server", "server", servers[i], "error", err)
// s.Close()
// finalErr = err
// continue
// }
return
s
,
nil
return
s
,
nil
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment