Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
35af37a2
Commit
35af37a2
authored
Jul 18, 2023
by
Michael Yang
Browse files
session id
parent
dbb3174c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
67 additions
and
36 deletions
+67
-36
api/types.go
api/types.go
+5
-3
cmd/cmd.go
cmd/cmd.go
+16
-8
llama/llama.go
llama/llama.go
+9
-9
server/routes.go
server/routes.go
+37
-16
No files found.
api/types.go
View file @
35af37a2
...
@@ -28,9 +28,10 @@ func (e StatusError) Error() string {
...
@@ -28,9 +28,10 @@ func (e StatusError) Error() string {
}
}
type
GenerateRequest
struct
{
type
GenerateRequest
struct
{
Model
string
`json:"model"`
SessionID
int64
`json:"session_id"`
Prompt
string
`json:"prompt"`
Model
string
`json:"model"`
Context
[]
int
`json:"context,omitempty"`
Prompt
string
`json:"prompt"`
Context
[]
int
`json:"context,omitempty"`
Options
`json:"options"`
Options
`json:"options"`
}
}
...
@@ -81,6 +82,7 @@ type ListResponseModel struct {
...
@@ -81,6 +82,7 @@ type ListResponseModel struct {
}
}
type
GenerateResponse
struct
{
type
GenerateResponse
struct
{
SessionID
int64
`json:"session_id"`
Model
string
`json:"model"`
Model
string
`json:"model"`
CreatedAt
time
.
Time
`json:"created_at"`
CreatedAt
time
.
Time
`json:"created_at"`
Response
string
`json:"response,omitempty"`
Response
string
`json:"response,omitempty"`
...
...
cmd/cmd.go
View file @
35af37a2
...
@@ -244,7 +244,7 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
...
@@ -244,7 +244,7 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
return
generateBatch
(
cmd
,
args
[
0
])
return
generateBatch
(
cmd
,
args
[
0
])
}
}
var
generateContextKey
str
uct
{}
type
generateContextKey
str
ing
func
generate
(
cmd
*
cobra
.
Command
,
model
,
prompt
string
)
error
{
func
generate
(
cmd
*
cobra
.
Command
,
model
,
prompt
string
)
error
{
if
len
(
strings
.
TrimSpace
(
prompt
))
>
0
{
if
len
(
strings
.
TrimSpace
(
prompt
))
>
0
{
...
@@ -255,22 +255,25 @@ func generate(cmd *cobra.Command, model, prompt string) error {
...
@@ -255,22 +255,25 @@ func generate(cmd *cobra.Command, model, prompt string) error {
var
latest
api
.
GenerateResponse
var
latest
api
.
GenerateResponse
generateContext
,
ok
:=
cmd
.
Context
()
.
Value
(
generateContextKey
)
.
([]
int
)
generateContext
,
ok
:=
cmd
.
Context
()
.
Value
(
generateContextKey
(
"context"
)
)
.
([]
int
)
if
!
ok
{
if
!
ok
{
generateContext
=
[]
int
{}
generateContext
=
[]
int
{}
}
}
request
:=
api
.
GenerateRequest
{
Model
:
model
,
Prompt
:
prompt
,
Context
:
generateContext
}
generateSession
,
ok
:=
cmd
.
Context
()
.
Value
(
generateContextKey
(
"session"
))
.
(
int64
)
fn
:=
func
(
resp
api
.
GenerateResponse
)
error
{
if
!
ok
{
generateSession
=
0
}
request
:=
api
.
GenerateRequest
{
Model
:
model
,
Prompt
:
prompt
,
Context
:
generateContext
,
SessionID
:
generateSession
}
fn
:=
func
(
response
api
.
GenerateResponse
)
error
{
if
!
spinner
.
IsFinished
()
{
if
!
spinner
.
IsFinished
()
{
spinner
.
Finish
()
spinner
.
Finish
()
}
}
latest
=
resp
latest
=
resp
onse
fmt
.
Print
(
resp
.
Response
)
fmt
.
Print
(
response
.
Response
)
cmd
.
SetContext
(
context
.
WithValue
(
cmd
.
Context
(),
generateContextKey
,
resp
.
Context
))
return
nil
return
nil
}
}
...
@@ -289,6 +292,11 @@ func generate(cmd *cobra.Command, model, prompt string) error {
...
@@ -289,6 +292,11 @@ func generate(cmd *cobra.Command, model, prompt string) error {
if
verbose
{
if
verbose
{
latest
.
Summary
()
latest
.
Summary
()
}
}
ctx
:=
cmd
.
Context
()
ctx
=
context
.
WithValue
(
ctx
,
generateContextKey
(
"context"
),
latest
.
Context
)
ctx
=
context
.
WithValue
(
ctx
,
generateContextKey
(
"session"
),
latest
.
SessionID
)
cmd
.
SetContext
(
ctx
)
}
}
return
nil
return
nil
...
...
llama/llama.go
View file @
35af37a2
...
@@ -91,7 +91,7 @@ import (
...
@@ -91,7 +91,7 @@ import (
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/api"
)
)
type
llama
struct
{
type
LLM
struct
{
params
*
C
.
struct_llama_context_params
params
*
C
.
struct_llama_context_params
model
*
C
.
struct_llama_model
model
*
C
.
struct_llama_model
ctx
*
C
.
struct_llama_context
ctx
*
C
.
struct_llama_context
...
@@ -99,12 +99,12 @@ type llama struct {
...
@@ -99,12 +99,12 @@ type llama struct {
api
.
Options
api
.
Options
}
}
func
New
(
model
string
,
opts
api
.
Options
)
(
*
llama
,
error
)
{
func
New
(
model
string
,
opts
api
.
Options
)
(
*
LLM
,
error
)
{
if
_
,
err
:=
os
.
Stat
(
model
);
err
!=
nil
{
if
_
,
err
:=
os
.
Stat
(
model
);
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
llm
:=
llama
{
Options
:
opts
}
llm
:=
LLM
{
Options
:
opts
}
C
.
llama_backend_init
(
C
.
bool
(
llm
.
UseNUMA
))
C
.
llama_backend_init
(
C
.
bool
(
llm
.
UseNUMA
))
...
@@ -144,14 +144,14 @@ func New(model string, opts api.Options) (*llama, error) {
...
@@ -144,14 +144,14 @@ func New(model string, opts api.Options) (*llama, error) {
return
&
llm
,
nil
return
&
llm
,
nil
}
}
func
(
llm
*
llama
)
Close
()
{
func
(
llm
*
LLM
)
Close
()
{
defer
C
.
llama_free_model
(
llm
.
model
)
defer
C
.
llama_free_model
(
llm
.
model
)
defer
C
.
llama_free
(
llm
.
ctx
)
defer
C
.
llama_free
(
llm
.
ctx
)
C
.
llama_print_timings
(
llm
.
ctx
)
C
.
llama_print_timings
(
llm
.
ctx
)
}
}
func
(
llm
*
llama
)
Predict
(
ctx
[]
int
,
prompt
string
,
fn
func
(
api
.
GenerateResponse
))
error
{
func
(
llm
*
LLM
)
Predict
(
ctx
[]
int
,
prompt
string
,
fn
func
(
api
.
GenerateResponse
))
error
{
if
input
:=
llm
.
tokenize
(
prompt
);
input
!=
nil
{
if
input
:=
llm
.
tokenize
(
prompt
);
input
!=
nil
{
embd
:=
make
([]
C
.
llama_token
,
len
(
ctx
))
embd
:=
make
([]
C
.
llama_token
,
len
(
ctx
))
for
i
:=
range
ctx
{
for
i
:=
range
ctx
{
...
@@ -164,7 +164,7 @@ func (llm *llama) Predict(ctx []int, prompt string, fn func(api.GenerateResponse
...
@@ -164,7 +164,7 @@ func (llm *llama) Predict(ctx []int, prompt string, fn func(api.GenerateResponse
return
errors
.
New
(
"llama: tokenize"
)
return
errors
.
New
(
"llama: tokenize"
)
}
}
func
(
llm
*
llama
)
tokenize
(
prompt
string
)
[]
C
.
llama_token
{
func
(
llm
*
LLM
)
tokenize
(
prompt
string
)
[]
C
.
llama_token
{
cPrompt
:=
C
.
CString
(
prompt
)
cPrompt
:=
C
.
CString
(
prompt
)
defer
C
.
free
(
unsafe
.
Pointer
(
cPrompt
))
defer
C
.
free
(
unsafe
.
Pointer
(
cPrompt
))
...
@@ -176,7 +176,7 @@ func (llm *llama) tokenize(prompt string) []C.llama_token {
...
@@ -176,7 +176,7 @@ func (llm *llama) tokenize(prompt string) []C.llama_token {
return
nil
return
nil
}
}
func
(
llm
*
llama
)
detokenize
(
tokens
...
C
.
llama_token
)
string
{
func
(
llm
*
LLM
)
detokenize
(
tokens
...
C
.
llama_token
)
string
{
var
sb
strings
.
Builder
var
sb
strings
.
Builder
for
_
,
token
:=
range
tokens
{
for
_
,
token
:=
range
tokens
{
sb
.
WriteString
(
C
.
GoString
(
C
.
llama_token_to_str
(
llm
.
ctx
,
token
)))
sb
.
WriteString
(
C
.
GoString
(
C
.
llama_token_to_str
(
llm
.
ctx
,
token
)))
...
@@ -185,7 +185,7 @@ func (llm *llama) detokenize(tokens ...C.llama_token) string {
...
@@ -185,7 +185,7 @@ func (llm *llama) detokenize(tokens ...C.llama_token) string {
return
sb
.
String
()
return
sb
.
String
()
}
}
func
(
llm
*
llama
)
generate
(
input
[]
C
.
llama_token
,
fn
func
(
api
.
GenerateResponse
))
error
{
func
(
llm
*
LLM
)
generate
(
input
[]
C
.
llama_token
,
fn
func
(
api
.
GenerateResponse
))
error
{
var
opts
C
.
struct_llama_sample_options
var
opts
C
.
struct_llama_sample_options
opts
.
repeat_penalty
=
C
.
float
(
llm
.
RepeatPenalty
)
opts
.
repeat_penalty
=
C
.
float
(
llm
.
RepeatPenalty
)
opts
.
frequency_penalty
=
C
.
float
(
llm
.
FrequencyPenalty
)
opts
.
frequency_penalty
=
C
.
float
(
llm
.
FrequencyPenalty
)
...
@@ -256,7 +256,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
...
@@ -256,7 +256,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
return
nil
return
nil
}
}
func
(
llm
*
llama
)
sample
(
output
deque
[
C
.
llama_token
],
opts
*
C
.
struct_llama_sample_options
)
(
C
.
llama_token
,
error
)
{
func
(
llm
*
LLM
)
sample
(
output
deque
[
C
.
llama_token
],
opts
*
C
.
struct_llama_sample_options
)
(
C
.
llama_token
,
error
)
{
numVocab
:=
int
(
C
.
llama_n_vocab
(
llm
.
ctx
))
numVocab
:=
int
(
C
.
llama_n_vocab
(
llm
.
ctx
))
logits
:=
unsafe
.
Slice
(
C
.
llama_get_logits
(
llm
.
ctx
),
numVocab
)
logits
:=
unsafe
.
Slice
(
C
.
llama_get_logits
(
llm
.
ctx
),
numVocab
)
...
...
server/routes.go
View file @
35af37a2
...
@@ -11,6 +11,7 @@ import (
...
@@ -11,6 +11,7 @@ import (
"os"
"os"
"path/filepath"
"path/filepath"
"strings"
"strings"
"sync"
"time"
"time"
"dario.cat/mergo"
"dario.cat/mergo"
...
@@ -21,7 +22,17 @@ import (
...
@@ -21,7 +22,17 @@ import (
"github.com/jmorganca/ollama/llama"
"github.com/jmorganca/ollama/llama"
)
)
var
mu
sync
.
Mutex
var
activeSession
struct
{
ID
int64
*
llama
.
LLM
}
func
GenerateHandler
(
c
*
gin
.
Context
)
{
func
GenerateHandler
(
c
*
gin
.
Context
)
{
mu
.
Lock
()
defer
mu
.
Unlock
()
start
:=
time
.
Now
()
start
:=
time
.
Now
()
var
req
api
.
GenerateRequest
var
req
api
.
GenerateRequest
...
@@ -36,29 +47,38 @@ func GenerateHandler(c *gin.Context) {
...
@@ -36,29 +47,38 @@ func GenerateHandler(c *gin.Context) {
return
return
}
}
opts
:=
api
.
DefaultOptions
()
if
req
.
SessionID
==
0
||
req
.
SessionID
!=
activeSession
.
ID
{
if
err
:=
mergo
.
Merge
(
&
opts
,
model
.
Options
,
mergo
.
WithOverride
);
err
!=
nil
{
if
activeSession
.
LLM
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()}
)
activeSession
.
Close
(
)
return
activeSession
.
LLM
=
nil
}
}
if
err
:=
mergo
.
Merge
(
&
opts
,
req
.
Options
,
mergo
.
WithOverride
);
err
!=
nil
{
opts
:=
api
.
DefaultOptions
()
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
if
err
:=
mergo
.
Merge
(
&
opts
,
model
.
Options
,
mergo
.
WithOverride
);
err
!=
nil
{
return
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
}
return
}
prompt
,
err
:=
model
.
Prompt
(
req
)
if
err
:=
mergo
.
Merge
(
&
opts
,
req
.
Options
,
mergo
.
WithOverride
);
err
!=
nil
{
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
return
}
llm
,
err
:=
llama
.
New
(
model
.
ModelPath
,
opts
)
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
activeSession
.
ID
=
time
.
Now
()
.
UnixNano
()
activeSession
.
LLM
=
llm
}
}
llm
,
err
:=
llama
.
New
(
model
.
ModelPath
,
opts
)
prompt
,
err
:=
model
.
Prompt
(
req
)
if
err
!=
nil
{
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
return
}
}
defer
llm
.
Close
()
ch
:=
make
(
chan
any
)
ch
:=
make
(
chan
any
)
go
func
()
{
go
func
()
{
...
@@ -66,6 +86,7 @@ func GenerateHandler(c *gin.Context) {
...
@@ -66,6 +86,7 @@ func GenerateHandler(c *gin.Context) {
fn
:=
func
(
r
api
.
GenerateResponse
)
{
fn
:=
func
(
r
api
.
GenerateResponse
)
{
r
.
Model
=
req
.
Model
r
.
Model
=
req
.
Model
r
.
CreatedAt
=
time
.
Now
()
.
UTC
()
r
.
CreatedAt
=
time
.
Now
()
.
UTC
()
r
.
SessionID
=
activeSession
.
ID
if
r
.
Done
{
if
r
.
Done
{
r
.
TotalDuration
=
time
.
Since
(
start
)
r
.
TotalDuration
=
time
.
Since
(
start
)
}
}
...
@@ -73,7 +94,7 @@ func GenerateHandler(c *gin.Context) {
...
@@ -73,7 +94,7 @@ func GenerateHandler(c *gin.Context) {
ch
<-
r
ch
<-
r
}
}
if
err
:=
llm
.
Predict
(
req
.
Context
,
prompt
,
fn
);
err
!=
nil
{
if
err
:=
activeSession
.
LLM
.
Predict
(
req
.
Context
,
prompt
,
fn
);
err
!=
nil
{
ch
<-
gin
.
H
{
"error"
:
err
.
Error
()}
ch
<-
gin
.
H
{
"error"
:
err
.
Error
()}
}
}
}()
}()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment