Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
e3936d4f
Unverified
Commit
e3936d4f
authored
Nov 28, 2024
by
ItzCrazyKns
Committed by
GitHub
Nov 27, 2024
Browse files
Support Multiple LoRa Adapters (#7667)
Closes #7627
parent
940e6277
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
14 deletions
+26
-14
llama/runner/runner.go
llama/runner/runner.go
+23
-8
llm/server.go
llm/server.go
+3
-6
No files found.
llama/runner/runner.go
View file @
e3936d4f
...
...
@@ -833,10 +833,21 @@ func (s *Server) health(w http.ResponseWriter, r *http.Request) {
}
}
type
multiLPath
[]
string
func
(
m
*
multiLPath
)
Set
(
value
string
)
error
{
*
m
=
append
(
*
m
,
value
)
return
nil
}
func
(
m
*
multiLPath
)
String
()
string
{
return
strings
.
Join
(
*
m
,
", "
)
}
func
(
s
*
Server
)
loadModel
(
params
llama
.
ModelParams
,
mpath
string
,
lpath
string
,
lpath
multiLPath
,
ppath
string
,
kvSize
int
,
flashAttention
bool
,
...
...
@@ -857,10 +868,12 @@ func (s *Server) loadModel(
panic
(
err
)
}
if
lpath
!=
""
{
err
:=
s
.
model
.
ApplyLoraFromFile
(
s
.
lc
,
lpath
,
1.0
,
threads
)
if
err
!=
nil
{
panic
(
err
)
if
lpath
.
String
()
!=
""
{
for
_
,
path
:=
range
lpath
{
err
:=
s
.
model
.
ApplyLoraFromFile
(
s
.
lc
,
path
,
1.0
,
threads
)
if
err
!=
nil
{
panic
(
err
)
}
}
}
...
...
@@ -890,7 +903,6 @@ func main() {
mainGpu
:=
flag
.
Int
(
"main-gpu"
,
0
,
"Main GPU"
)
flashAttention
:=
flag
.
Bool
(
"flash-attn"
,
false
,
"Enable flash attention"
)
kvSize
:=
flag
.
Int
(
"ctx-size"
,
2048
,
"Context (or KV cache) size"
)
lpath
:=
flag
.
String
(
"lora"
,
""
,
"Path to lora layer file"
)
port
:=
flag
.
Int
(
"port"
,
8080
,
"Port to expose the server on"
)
threads
:=
flag
.
Int
(
"threads"
,
runtime
.
NumCPU
(),
"Number of threads to use during generation"
)
verbose
:=
flag
.
Bool
(
"verbose"
,
false
,
"verbose output (default: disabled)"
)
...
...
@@ -900,6 +912,9 @@ func main() {
multiUserCache
:=
flag
.
Bool
(
"multiuser-cache"
,
false
,
"optimize input cache algorithm for multiple users"
)
requirements
:=
flag
.
Bool
(
"requirements"
,
false
,
"print json requirement information"
)
var
lpaths
multiLPath
flag
.
Var
(
&
lpaths
,
"lora"
,
"Path to lora layer file (can be specified multiple times)"
)
flag
.
Parse
()
if
*
requirements
{
printRequirements
(
os
.
Stdout
)
...
...
@@ -946,7 +961,7 @@ func main() {
params
:=
llama
.
ModelParams
{
NumGpuLayers
:
*
nGpuLayers
,
MainGpu
:
*
mainGpu
,
UseMmap
:
!*
noMmap
&&
*
lpath
==
""
,
UseMmap
:
!*
noMmap
&&
lpath
s
.
String
()
==
""
,
UseMlock
:
*
mlock
,
TensorSplit
:
tensorSplitFloats
,
Progress
:
func
(
progress
float32
)
{
...
...
@@ -955,7 +970,7 @@ func main() {
}
server
.
ready
.
Add
(
1
)
go
server
.
loadModel
(
params
,
*
mpath
,
*
lpath
,
*
ppath
,
*
kvSize
,
*
flashAttention
,
*
threads
,
*
multiUserCache
)
go
server
.
loadModel
(
params
,
*
mpath
,
lpath
s
,
*
ppath
,
*
kvSize
,
*
flashAttention
,
*
threads
,
*
multiUserCache
)
server
.
cond
=
sync
.
NewCond
(
&
server
.
mu
)
...
...
llm/server.go
View file @
e3936d4f
...
...
@@ -144,10 +144,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
// Loop through potential servers
finalErr
:=
errors
.
New
(
"no suitable llama servers found"
)
if
len
(
adapters
)
>
1
{
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
}
rDir
,
err
:=
runners
.
Refresh
(
build
.
EmbedFS
)
if
err
!=
nil
{
return
nil
,
err
...
...
@@ -201,8 +197,9 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
}
if
len
(
adapters
)
>
0
{
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
params
=
append
(
params
,
"--lora"
,
adapters
[
0
])
for
_
,
adapter
:=
range
adapters
{
params
=
append
(
params
,
"--lora"
,
adapter
)
}
}
if
len
(
projectors
)
>
0
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment