Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
ead259d8
Unverified
Commit
ead259d8
authored
Jun 11, 2024
by
Jeffrey Morgan
Committed by
GitHub
Jun 11, 2024
Browse files
llm: fix seed value not being applied to requests (#4986)
parent
2ff45d57
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
9 deletions
+3
-9
docs/api.md
docs/api.md
+2
-3
llm/ext_server/server.cpp
llm/ext_server/server.cpp
+1
-6
No files found.
docs/api.md
View file @
ead259d8
...
@@ -250,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
...
@@ -250,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
#### Request (Reproducible outputs)
#### Request (Reproducible outputs)
For reproducible outputs, set
`temperature`
to 0 and
`seed`
to a number:
For reproducible outputs, set
`seed`
to a number:
##### Request
##### Request
...
@@ -259,8 +259,7 @@ curl http://localhost:11434/api/generate -d '{
...
@@ -259,8 +259,7 @@ curl http://localhost:11434/api/generate -d '{
"model": "mistral",
"model": "mistral",
"prompt": "Why is the sky blue?",
"prompt": "Why is the sky blue?",
"options": {
"options": {
"seed": 123,
"seed": 123
"temperature": 0
}
}
}'
}'
```
```
...
...
llm/ext_server/server.cpp
View file @
ead259d8
...
@@ -359,7 +359,6 @@ struct llama_server_context
...
@@ -359,7 +359,6 @@ struct llama_server_context
// slots / clients
// slots / clients
std
::
vector
<
server_slot
>
slots
;
std
::
vector
<
server_slot
>
slots
;
json
default_generation_settings_for_props
;
llama_server_queue
queue_tasks
;
llama_server_queue
queue_tasks
;
llama_server_response
queue_results
;
llama_server_response
queue_results
;
...
@@ -483,9 +482,6 @@ struct llama_server_context
...
@@ -483,9 +482,6 @@ struct llama_server_context
slots
.
push_back
(
slot
);
slots
.
push_back
(
slot
);
}
}
default_generation_settings_for_props
=
get_formated_generation
(
slots
.
front
());
default_generation_settings_for_props
[
"seed"
]
=
-
1
;
batch
=
llama_batch_init
(
n_ctx
,
0
,
params
.
n_parallel
);
batch
=
llama_batch_init
(
n_ctx
,
0
,
params
.
n_parallel
);
}
}
...
@@ -584,7 +580,7 @@ struct llama_server_context
...
@@ -584,7 +580,7 @@ struct llama_server_context
slot
->
sparams
.
mirostat_eta
=
json_value
(
data
,
"mirostat_eta"
,
default_sparams
.
mirostat_eta
);
slot
->
sparams
.
mirostat_eta
=
json_value
(
data
,
"mirostat_eta"
,
default_sparams
.
mirostat_eta
);
slot
->
sparams
.
penalize_nl
=
json_value
(
data
,
"penalize_nl"
,
default_sparams
.
penalize_nl
);
slot
->
sparams
.
penalize_nl
=
json_value
(
data
,
"penalize_nl"
,
default_sparams
.
penalize_nl
);
slot
->
params
.
n_keep
=
json_value
(
data
,
"n_keep"
,
slot
->
params
.
n_keep
);
slot
->
params
.
n_keep
=
json_value
(
data
,
"n_keep"
,
slot
->
params
.
n_keep
);
slot
->
params
.
seed
=
json_value
(
data
,
"seed"
,
default_params
.
seed
);
slot
->
s
params
.
seed
=
json_value
(
data
,
"seed"
,
default_params
.
seed
);
slot
->
sparams
.
grammar
=
json_value
(
data
,
"grammar"
,
default_sparams
.
grammar
);
slot
->
sparams
.
grammar
=
json_value
(
data
,
"grammar"
,
default_sparams
.
grammar
);
slot
->
sparams
.
n_probs
=
json_value
(
data
,
"n_probs"
,
default_sparams
.
n_probs
);
slot
->
sparams
.
n_probs
=
json_value
(
data
,
"n_probs"
,
default_sparams
.
n_probs
);
slot
->
sparams
.
min_keep
=
json_value
(
data
,
"min_keep"
,
default_sparams
.
min_keep
);
slot
->
sparams
.
min_keep
=
json_value
(
data
,
"min_keep"
,
default_sparams
.
min_keep
);
...
@@ -811,7 +807,6 @@ struct llama_server_context
...
@@ -811,7 +807,6 @@ struct llama_server_context
llama_sampling_free
(
slot
->
ctx_sampling
);
llama_sampling_free
(
slot
->
ctx_sampling
);
}
}
slot
->
ctx_sampling
=
llama_sampling_init
(
slot
->
sparams
);
slot
->
ctx_sampling
=
llama_sampling_init
(
slot
->
sparams
);
llama_set_rng_seed
(
ctx
,
slot
->
params
.
seed
);
slot
->
command
=
LOAD_PROMPT
;
slot
->
command
=
LOAD_PROMPT
;
all_slots_are_idle
=
false
;
all_slots_are_idle
=
false
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment