Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
8dc68417
Unverified
Commit
8dc68417
authored
Sep 05, 2023
by
Michael Yang
Committed by
GitHub
Sep 05, 2023
Browse files
Merge pull request #463 from jmorganca/mxyng/fix-last-token
fix not forwarding last token
parents
5d3f314b
59a70552
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
45 deletions
+23
-45
llm/ggml_llama.go
llm/ggml_llama.go
+23
-45
No files found.
llm/ggml_llama.go
View file @
8dc68417
...
...
@@ -353,11 +353,6 @@ func (llm *llama) SetOptions(opts api.Options) {
llm
.
Options
=
opts
}
type
Prediction
struct
{
Content
string
`json:"content"`
Stop
bool
`json:"stop"`
}
type
GenerationSettings
struct
{
FrequencyPenalty
float64
`json:"frequency_penalty"`
IgnoreEOS
bool
`json:"ignore_eos"`
...
...
@@ -385,31 +380,19 @@ type GenerationSettings struct {
}
type
Timings
struct
{
PredictedMS
float64
`json:"predicted_ms"`
PredictedN
int
`json:"predicted_n"`
PredictedPerSecond
float64
`json:"predicted_per_second"`
PredictedPerTokenMS
float64
`json:"predicted_per_token_ms"`
PromptMS
float64
`json:"prompt_ms"`
PromptN
int
`json:"prompt_n"`
PromptPerSecond
float64
`json:"prompt_per_second"`
PromptPerTokenMS
float64
`json:"prompt_per_token_ms"`
PredictedN
int
`json:"predicted_n"`
PredictedMS
float64
`json:"predicted_ms"`
PromptN
int
`json:"prompt_n"`
PromptMS
float64
`json:"prompt_ms"`
}
type
PredictComplete
struct
{
Content
string
`json:"content"`
GenerationSettings
GenerationSettings
`json:"generation_settings"`
Model
string
`json:"model"`
Prompt
string
`json:"prompt"`
Stop
bool
`json:"stop"`
StoppedEOS
bool
`json:"stopped_eos"`
StoppedLimit
bool
`json:"stopped_limit"`
StoppedWord
bool
`json:"stopped_word"`
StoppingWord
string
`json:"stopping_word"`
Timings
Timings
`json:"timings"`
TokensCached
int
`json:"tokens_cached"`
TokensEvaluated
int
`json:"tokens_evaluated"`
TokensPredicted
int
`json:"tokens_predicted"`
Truncated
bool
`json:"truncated"`
type
Prediction
struct
{
Content
string
`json:"content"`
Model
string
`json:"model"`
Prompt
string
`json:"prompt"`
Stop
bool
`json:"stop"`
Timings
`json:"timings"`
}
type
PredictRequest
struct
{
...
...
@@ -509,13 +492,15 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
// Read data from the server-side event stream
if
strings
.
HasPrefix
(
line
,
"data: "
)
{
evt
:=
line
[
6
:
]
var
complete
PredictComplete
if
err
:=
json
.
Unmarshal
([]
byte
(
evt
),
&
complete
);
err
!=
nil
{
return
fmt
.
Errorf
(
"error unmarshaling llm
complete
response: %v"
,
err
)
var
p
Prediction
if
err
:=
json
.
Unmarshal
([]
byte
(
evt
),
&
p
);
err
!=
nil
{
return
fmt
.
Errorf
(
"error unmarshaling llm
prediction
response: %v"
,
err
)
}
if
complete
.
Timings
.
PredictedMS
>
0
{
nextContext
.
WriteString
(
complete
.
Content
)
fn
(
api
.
GenerateResponse
{
Response
:
p
.
Content
})
nextContext
.
WriteString
(
p
.
Content
)
if
p
.
Stop
{
embd
,
err
:=
llm
.
Encode
(
ctx
,
nextContext
.
String
())
if
err
!=
nil
{
return
fmt
.
Errorf
(
"encoding context: %v"
,
err
)
...
...
@@ -524,21 +509,14 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
fn
(
api
.
GenerateResponse
{
Done
:
true
,
Context
:
embd
,
PromptEvalCount
:
int
(
complete
.
Timings
.
PromptN
)
,
PromptEvalDuration
:
parseDurationMs
(
float64
(
complete
.
Timings
.
PromptMS
)
)
,
EvalCount
:
int
(
complete
.
Timings
.
PredictedN
)
,
EvalDuration
:
parseDurationMs
(
float64
(
complete
.
Timings
.
PredictedMS
)
)
,
PromptEvalCount
:
p
.
PromptN
,
PromptEvalDuration
:
parseDurationMs
(
p
.
PromptMS
),
EvalCount
:
p
.
PredictedN
,
EvalDuration
:
parseDurationMs
(
p
.
PredictedMS
),
})
return
nil
}
var
p
Prediction
if
err
:=
json
.
Unmarshal
([]
byte
(
evt
),
&
p
);
err
!=
nil
{
return
fmt
.
Errorf
(
"error unmarshaling llm prediction response: %v"
,
err
)
return
nil
}
fn
(
api
.
GenerateResponse
{
Response
:
p
.
Content
})
nextContext
.
WriteString
(
p
.
Content
)
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment