Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
8dc68417
Unverified
Commit
8dc68417
authored
Sep 05, 2023
by
Michael Yang
Committed by
GitHub
Sep 05, 2023
Browse files
Merge pull request #463 from jmorganca/mxyng/fix-last-token
fix not forwarding last token
parents
5d3f314b
59a70552
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
45 deletions
+23
-45
llm/ggml_llama.go
llm/ggml_llama.go
+23
-45
No files found.
llm/ggml_llama.go
View file @
8dc68417
...
@@ -353,11 +353,6 @@ func (llm *llama) SetOptions(opts api.Options) {
...
@@ -353,11 +353,6 @@ func (llm *llama) SetOptions(opts api.Options) {
llm
.
Options
=
opts
llm
.
Options
=
opts
}
}
type
Prediction
struct
{
Content
string
`json:"content"`
Stop
bool
`json:"stop"`
}
type
GenerationSettings
struct
{
type
GenerationSettings
struct
{
FrequencyPenalty
float64
`json:"frequency_penalty"`
FrequencyPenalty
float64
`json:"frequency_penalty"`
IgnoreEOS
bool
`json:"ignore_eos"`
IgnoreEOS
bool
`json:"ignore_eos"`
...
@@ -385,31 +380,19 @@ type GenerationSettings struct {
...
@@ -385,31 +380,19 @@ type GenerationSettings struct {
}
}
type
Timings
struct
{
type
Timings
struct
{
PredictedMS
float64
`json:"predicted_ms"`
PredictedN
int
`json:"predicted_n"`
PredictedN
int
`json:"predicted_n"`
PredictedMS
float64
`json:"predicted_ms"`
PredictedPerSecond
float64
`json:"predicted_per_second"`
PromptN
int
`json:"prompt_n"`
PredictedPerTokenMS
float64
`json:"predicted_per_token_ms"`
PromptMS
float64
`json:"prompt_ms"`
PromptMS
float64
`json:"prompt_ms"`
PromptN
int
`json:"prompt_n"`
PromptPerSecond
float64
`json:"prompt_per_second"`
PromptPerTokenMS
float64
`json:"prompt_per_token_ms"`
}
}
type
PredictComplete
struct
{
type
Prediction
struct
{
Content
string
`json:"content"`
Content
string
`json:"content"`
GenerationSettings
GenerationSettings
`json:"generation_settings"`
Model
string
`json:"model"`
Model
string
`json:"model"`
Prompt
string
`json:"prompt"`
Prompt
string
`json:"prompt"`
Stop
bool
`json:"stop"`
Stop
bool
`json:"stop"`
StoppedEOS
bool
`json:"stopped_eos"`
Timings
`json:"timings"`
StoppedLimit
bool
`json:"stopped_limit"`
StoppedWord
bool
`json:"stopped_word"`
StoppingWord
string
`json:"stopping_word"`
Timings
Timings
`json:"timings"`
TokensCached
int
`json:"tokens_cached"`
TokensEvaluated
int
`json:"tokens_evaluated"`
TokensPredicted
int
`json:"tokens_predicted"`
Truncated
bool
`json:"truncated"`
}
}
type
PredictRequest
struct
{
type
PredictRequest
struct
{
...
@@ -509,13 +492,15 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
...
@@ -509,13 +492,15 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
// Read data from the server-side event stream
// Read data from the server-side event stream
if
strings
.
HasPrefix
(
line
,
"data: "
)
{
if
strings
.
HasPrefix
(
line
,
"data: "
)
{
evt
:=
line
[
6
:
]
evt
:=
line
[
6
:
]
var
complete
PredictComplete
var
p
Prediction
if
err
:=
json
.
Unmarshal
([]
byte
(
evt
),
&
complete
);
err
!=
nil
{
if
err
:=
json
.
Unmarshal
([]
byte
(
evt
),
&
p
);
err
!=
nil
{
return
fmt
.
Errorf
(
"error unmarshaling llm
complete
response: %v"
,
err
)
return
fmt
.
Errorf
(
"error unmarshaling llm
prediction
response: %v"
,
err
)
}
}
if
complete
.
Timings
.
PredictedMS
>
0
{
fn
(
api
.
GenerateResponse
{
Response
:
p
.
Content
})
nextContext
.
WriteString
(
complete
.
Content
)
nextContext
.
WriteString
(
p
.
Content
)
if
p
.
Stop
{
embd
,
err
:=
llm
.
Encode
(
ctx
,
nextContext
.
String
())
embd
,
err
:=
llm
.
Encode
(
ctx
,
nextContext
.
String
())
if
err
!=
nil
{
if
err
!=
nil
{
return
fmt
.
Errorf
(
"encoding context: %v"
,
err
)
return
fmt
.
Errorf
(
"encoding context: %v"
,
err
)
...
@@ -524,21 +509,14 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
...
@@ -524,21 +509,14 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
fn
(
api
.
GenerateResponse
{
fn
(
api
.
GenerateResponse
{
Done
:
true
,
Done
:
true
,
Context
:
embd
,
Context
:
embd
,
PromptEvalCount
:
int
(
complete
.
Timings
.
PromptN
)
,
PromptEvalCount
:
p
.
PromptN
,
PromptEvalDuration
:
parseDurationMs
(
float64
(
complete
.
Timings
.
PromptMS
)
)
,
PromptEvalDuration
:
parseDurationMs
(
p
.
PromptMS
),
EvalCount
:
int
(
complete
.
Timings
.
PredictedN
)
,
EvalCount
:
p
.
PredictedN
,
EvalDuration
:
parseDurationMs
(
float64
(
complete
.
Timings
.
PredictedMS
)
)
,
EvalDuration
:
parseDurationMs
(
p
.
PredictedMS
),
})
})
return
nil
}
var
p
Prediction
return
nil
if
err
:=
json
.
Unmarshal
([]
byte
(
evt
),
&
p
);
err
!=
nil
{
return
fmt
.
Errorf
(
"error unmarshaling llm prediction response: %v"
,
err
)
}
}
fn
(
api
.
GenerateResponse
{
Response
:
p
.
Content
})
nextContext
.
WriteString
(
p
.
Content
)
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment