Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
32f62fbb
"vscode:/vscode.git/clone" did not exist on "95dafc6475bc36490e213269d1028adfd4f75363"
Unverified
Commit
32f62fbb
authored
Dec 05, 2023
by
Michael Yang
Committed by
GitHub
Dec 05, 2023
Browse files
Merge pull request #1334 from jmorganca/mxyng/load-projectors
load projectors
parents
409bb967
5d75505e
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
62 additions
and
27 deletions
+62
-27
api/types.go
api/types.go
+10
-0
llm/llama.go
llm/llama.go
+6
-1
llm/llm.go
llm/llm.go
+3
-3
server/images.go
server/images.go
+32
-17
server/routes.go
server/routes.go
+11
-6
No files found.
api/types.go
View file @
32f62fbb
...
...
@@ -203,12 +203,22 @@ type GenerateResponse struct {
CreatedAt
time
.
Time
`json:"created_at"`
Response
string
`json:"response"`
ModelConfiguration
ModelConfiguration
`json:"model_configuration"`
Done
bool
`json:"done"`
Context
[]
int
`json:"context,omitempty"`
Metrics
}
type
ModelConfiguration
struct
{
ModelFormat
string
`json:"model_format"`
ModelFamily
string
`json:"model_family"`
ModelFamilies
[]
string
`json:"model_families"`
ModelType
string
`json:"model_type"`
FileType
string
`json:"file_type"`
}
func
(
m
*
Metrics
)
Summary
()
{
if
m
.
TotalDuration
>
0
{
fmt
.
Fprintf
(
os
.
Stderr
,
"total duration: %v
\n
"
,
m
.
TotalDuration
)
...
...
llm/llama.go
View file @
32f62fbb
...
...
@@ -325,7 +325,7 @@ func (w *StatusWriter) Write(b []byte) (int, error) {
return
os
.
Stderr
.
Write
(
b
)
}
func
newLlama
(
model
string
,
adapters
[]
string
,
runners
[]
ModelRunner
,
numLayers
int64
,
opts
api
.
Options
)
(
*
llama
,
error
)
{
func
newLlama
(
model
string
,
adapters
,
projectors
[]
string
,
runners
[]
ModelRunner
,
numLayers
int64
,
opts
api
.
Options
)
(
*
llama
,
error
)
{
fileInfo
,
err
:=
os
.
Stat
(
model
)
if
err
!=
nil
{
return
nil
,
err
...
...
@@ -365,6 +365,11 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
params
=
append
(
params
,
"--lora"
,
adapters
[
0
])
}
if
len
(
projectors
)
>
0
{
// TODO: applying multiple projectors is not supported by the llama.cpp server yet
params
=
append
(
params
,
"--mmproj"
,
projectors
[
0
])
}
if
opts
.
NumThread
>
0
{
params
=
append
(
params
,
"--threads"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumThread
))
}
...
...
llm/llm.go
View file @
32f62fbb
...
...
@@ -23,7 +23,7 @@ type LLM interface {
Ping
(
context
.
Context
)
error
}
func
New
(
workDir
,
model
string
,
adapters
[]
string
,
opts
api
.
Options
)
(
LLM
,
error
)
{
func
New
(
workDir
,
model
string
,
adapters
,
projectors
[]
string
,
opts
api
.
Options
)
(
LLM
,
error
)
{
if
_
,
err
:=
os
.
Stat
(
model
);
err
!=
nil
{
return
nil
,
err
}
...
...
@@ -82,9 +82,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
opts
.
NumGQA
=
0
opts
.
RopeFrequencyBase
=
0.0
opts
.
RopeFrequencyScale
=
0.0
return
newLlama
(
model
,
adapters
,
chooseRunners
(
workDir
,
"gguf"
),
ggml
.
NumLayers
(),
opts
)
return
newLlama
(
model
,
adapters
,
projectors
,
chooseRunners
(
workDir
,
"gguf"
),
ggml
.
NumLayers
(),
opts
)
case
"ggml"
,
"ggmf"
,
"ggjt"
,
"ggla"
:
return
newLlama
(
model
,
adapters
,
chooseRunners
(
workDir
,
"ggml"
),
ggml
.
NumLayers
(),
opts
)
return
newLlama
(
model
,
adapters
,
projectors
,
chooseRunners
(
workDir
,
"ggml"
),
ggml
.
NumLayers
(),
opts
)
default
:
return
nil
,
fmt
.
Errorf
(
"unknown ggml type: %s"
,
ggml
.
ModelFamily
())
}
...
...
server/images.go
View file @
32f62fbb
...
...
@@ -35,16 +35,18 @@ type RegistryOptions struct {
}
type
Model
struct
{
Name
string
`json:"name"`
ShortName
string
ModelPath
string
OriginalModel
string
AdapterPaths
[]
string
Template
string
System
string
License
[]
string
Digest
string
Options
map
[
string
]
interface
{}
Name
string
`json:"name"`
Config
ConfigV2
ShortName
string
ModelPath
string
OriginalModel
string
AdapterPaths
[]
string
ProjectorPaths
[]
string
Template
string
System
string
License
[]
string
Digest
string
Options
map
[
string
]
interface
{}
}
type
PromptVars
struct
{
...
...
@@ -136,16 +138,12 @@ type ManifestV2 struct {
}
type
ConfigV2
struct
{
ModelFormat
string
`json:"model_format"`
ModelFamily
string
`json:"model_family"`
ModelFamilies
[]
string
`json:"model_families"`
ModelType
string
`json:"model_type"`
FileType
string
`json:"file_type"`
RootFS
RootFS
`json:"rootfs"`
// required by spec
Architecture
string
`json:"architecture"`
OS
string
`json:"os"`
RootFS
RootFS
`json:"rootfs"`
api
.
ModelConfiguration
}
func
(
c
*
ConfigV2
)
SetModelFormat
(
format
string
)
{
...
...
@@ -234,6 +232,21 @@ func GetModel(name string) (*Model, error) {
License
:
[]
string
{},
}
filename
,
err
:=
GetBlobsPath
(
manifest
.
Config
.
Digest
)
if
err
!=
nil
{
return
nil
,
err
}
configFile
,
err
:=
os
.
Open
(
filename
)
if
err
!=
nil
{
return
nil
,
err
}
defer
configFile
.
Close
()
if
err
:=
json
.
NewDecoder
(
configFile
)
.
Decode
(
&
model
.
Config
);
err
!=
nil
{
return
nil
,
err
}
for
_
,
layer
:=
range
manifest
.
Layers
{
filename
,
err
:=
GetBlobsPath
(
layer
.
Digest
)
if
err
!=
nil
{
...
...
@@ -250,6 +263,8 @@ func GetModel(name string) (*Model, error) {
log
.
Print
(
"WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored."
)
case
"application/vnd.ollama.image.adapter"
:
model
.
AdapterPaths
=
append
(
model
.
AdapterPaths
,
filename
)
case
"application/vnd.ollama.image.projector"
:
model
.
ProjectorPaths
=
append
(
model
.
ProjectorPaths
,
filename
)
case
"application/vnd.ollama.image.template"
:
bts
,
err
:=
os
.
ReadFile
(
filename
)
if
err
!=
nil
{
...
...
server/routes.go
View file @
32f62fbb
...
...
@@ -105,7 +105,7 @@ func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sess
loaded
.
Options
=
nil
}
llmRunner
,
err
:=
llm
.
New
(
workDir
,
model
.
ModelPath
,
model
.
AdapterPaths
,
opts
)
llmRunner
,
err
:=
llm
.
New
(
workDir
,
model
.
ModelPath
,
model
.
AdapterPaths
,
model
.
ProjectorPaths
,
opts
)
if
err
!=
nil
{
// some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to
...
...
@@ -198,7 +198,11 @@ func GenerateHandler(c *gin.Context) {
// an empty request loads the model
if
req
.
Prompt
==
""
&&
req
.
Template
==
""
&&
req
.
System
==
""
{
c
.
JSON
(
http
.
StatusOK
,
api
.
GenerateResponse
{
CreatedAt
:
time
.
Now
()
.
UTC
(),
Model
:
req
.
Model
,
Done
:
true
})
c
.
JSON
(
http
.
StatusOK
,
api
.
GenerateResponse
{
CreatedAt
:
time
.
Now
()
.
UTC
(),
Model
:
req
.
Model
,
ModelConfiguration
:
model
.
Config
.
ModelConfiguration
,
Done
:
true
})
return
}
...
...
@@ -257,10 +261,11 @@ func GenerateHandler(c *gin.Context) {
}
resp
:=
api
.
GenerateResponse
{
Model
:
r
.
Model
,
CreatedAt
:
r
.
CreatedAt
,
Done
:
r
.
Done
,
Response
:
r
.
Content
,
Model
:
r
.
Model
,
ModelConfiguration
:
model
.
Config
.
ModelConfiguration
,
CreatedAt
:
r
.
CreatedAt
,
Done
:
r
.
Done
,
Response
:
r
.
Content
,
Metrics
:
api
.
Metrics
{
TotalDuration
:
r
.
TotalDuration
,
LoadDuration
:
r
.
LoadDuration
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment