Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
da74384a
Commit
da74384a
authored
Jul 06, 2023
by
Bruce MacDonald
Browse files
remove prompt cache
parent
45bf83ff
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
41 deletions
+12
-41
llama/binding/binding.cpp
llama/binding/binding.cpp
+3
-8
llama/binding/binding.h
llama/binding/binding.h
+2
-3
llama/llama.go
llama/llama.go
+4
-10
llama/options.go
llama/options.go
+3
-20
No files found.
llama/binding/binding.cpp
View file @
da74384a
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
#include <windows.h>
#include <windows.h>
#endif
#endif
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) ||
\
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || \
defined(_WIN32)
defined(_WIN32)
void
sigint_handler
(
int
signo
)
{
void
sigint_handler
(
int
signo
)
{
if
(
signo
==
SIGINT
)
{
if
(
signo
==
SIGINT
)
{
...
@@ -573,15 +573,13 @@ void *llama_allocate_params(
...
@@ -573,15 +573,13 @@ void *llama_allocate_params(
const
char
**
antiprompt
,
int
antiprompt_count
,
float
tfs_z
,
float
typical_p
,
const
char
**
antiprompt
,
int
antiprompt_count
,
float
tfs_z
,
float
typical_p
,
float
frequency_penalty
,
float
presence_penalty
,
int
mirostat
,
float
frequency_penalty
,
float
presence_penalty
,
int
mirostat
,
float
mirostat_eta
,
float
mirostat_tau
,
bool
penalize_nl
,
float
mirostat_eta
,
float
mirostat_tau
,
bool
penalize_nl
,
const
char
*
logit_bias
,
const
char
*
session_file
,
bool
prompt_cache_all
,
const
char
*
logit_bias
,
bool
mlock
,
bool
mmap
,
const
char
*
maingpu
,
bool
mlock
,
bool
mmap
,
const
char
*
maingpu
,
const
char
*
tensorsplit
,
const
char
*
tensorsplit
)
{
bool
prompt_cache_ro
)
{
gpt_params
*
params
=
new
gpt_params
;
gpt_params
*
params
=
new
gpt_params
;
params
->
seed
=
seed
;
params
->
seed
=
seed
;
params
->
n_threads
=
threads
;
params
->
n_threads
=
threads
;
params
->
n_predict
=
tokens
;
params
->
n_predict
=
tokens
;
params
->
repeat_last_n
=
repeat_last_n
;
params
->
repeat_last_n
=
repeat_last_n
;
params
->
prompt_cache_ro
=
prompt_cache_ro
;
params
->
top_k
=
top_k
;
params
->
top_k
=
top_k
;
params
->
top_p
=
top_p
;
params
->
top_p
=
top_p
;
params
->
memory_f16
=
memory_f16
;
params
->
memory_f16
=
memory_f16
;
...
@@ -612,9 +610,6 @@ void *llama_allocate_params(
...
@@ -612,9 +610,6 @@ void *llama_allocate_params(
}
}
}
}
params
->
prompt_cache_all
=
prompt_cache_all
;
params
->
path_prompt_cache
=
session_file
;
if
(
ignore_eos
)
{
if
(
ignore_eos
)
{
params
->
logit_bias
[
llama_token_eos
()]
=
-
INFINITY
;
params
->
logit_bias
[
llama_token_eos
()]
=
-
INFINITY
;
}
}
...
...
llama/binding/binding.h
View file @
da74384a
...
@@ -31,9 +31,8 @@ void *llama_allocate_params(
...
@@ -31,9 +31,8 @@ void *llama_allocate_params(
const
char
**
antiprompt
,
int
antiprompt_count
,
float
tfs_z
,
float
typical_p
,
const
char
**
antiprompt
,
int
antiprompt_count
,
float
tfs_z
,
float
typical_p
,
float
frequency_penalty
,
float
presence_penalty
,
int
mirostat
,
float
frequency_penalty
,
float
presence_penalty
,
int
mirostat
,
float
mirostat_eta
,
float
mirostat_tau
,
bool
penalize_nl
,
float
mirostat_eta
,
float
mirostat_tau
,
bool
penalize_nl
,
const
char
*
logit_bias
,
const
char
*
session_file
,
bool
prompt_cache_all
,
const
char
*
logit_bias
,
bool
mlock
,
bool
mmap
,
const
char
*
maingpu
,
bool
mlock
,
bool
mmap
,
const
char
*
maingpu
,
const
char
*
tensorsplit
,
const
char
*
tensorsplit
);
bool
prompt_cache_ro
);
void
llama_free_params
(
void
*
params_ptr
);
void
llama_free_params
(
void
*
params_ptr
);
...
...
llama/llama.go
View file @
da74384a
...
@@ -28,6 +28,7 @@ package llama
...
@@ -28,6 +28,7 @@ package llama
// #include "binding/binding.h"
// #include "binding/binding.h"
// #include <stdlib.h>
// #include <stdlib.h>
import
"C"
import
"C"
import
(
import
(
"fmt"
"fmt"
"strings"
"strings"
...
@@ -69,7 +70,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
...
@@ -69,7 +70,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
po
.
Tokens
=
99999999
po
.
Tokens
=
99999999
}
}
defer
C
.
free
(
unsafe
.
Pointer
(
input
))
defer
C
.
free
(
unsafe
.
Pointer
(
input
))
reverseCount
:=
len
(
po
.
StopPrompts
)
reverseCount
:=
len
(
po
.
StopPrompts
)
reversePrompt
:=
make
([]
*
C
.
char
,
reverseCount
)
reversePrompt
:=
make
([]
*
C
.
char
,
reverseCount
)
var
pass
**
C
.
char
var
pass
**
C
.
char
...
@@ -86,9 +87,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
...
@@ -86,9 +87,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
C
.
int
(
po
.
Batch
),
C
.
int
(
po
.
NKeep
),
pass
,
C
.
int
(
reverseCount
),
C
.
int
(
po
.
Batch
),
C
.
int
(
po
.
NKeep
),
pass
,
C
.
int
(
reverseCount
),
C
.
float
(
po
.
TailFreeSamplingZ
),
C
.
float
(
po
.
TypicalP
),
C
.
float
(
po
.
FrequencyPenalty
),
C
.
float
(
po
.
PresencePenalty
),
C
.
float
(
po
.
TailFreeSamplingZ
),
C
.
float
(
po
.
TypicalP
),
C
.
float
(
po
.
FrequencyPenalty
),
C
.
float
(
po
.
PresencePenalty
),
C
.
int
(
po
.
Mirostat
),
C
.
float
(
po
.
MirostatETA
),
C
.
float
(
po
.
MirostatTAU
),
C
.
bool
(
po
.
PenalizeNL
),
C
.
CString
(
po
.
LogitBias
),
C
.
int
(
po
.
Mirostat
),
C
.
float
(
po
.
MirostatETA
),
C
.
float
(
po
.
MirostatTAU
),
C
.
bool
(
po
.
PenalizeNL
),
C
.
CString
(
po
.
LogitBias
),
C
.
CString
(
po
.
PathPromptCache
),
C
.
bool
(
po
.
PromptCacheAll
),
C
.
bool
(
po
.
MLock
),
C
.
bool
(
po
.
MMap
),
C
.
bool
(
po
.
MLock
),
C
.
bool
(
po
.
MMap
),
C
.
CString
(
po
.
MainGPU
),
C
.
CString
(
po
.
TensorSplit
),
C
.
CString
(
po
.
MainGPU
),
C
.
CString
(
po
.
TensorSplit
),
C
.
bool
(
po
.
PromptCacheRO
),
)
)
defer
C
.
llama_free_params
(
params
)
defer
C
.
llama_free_params
(
params
)
...
@@ -128,9 +127,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
...
@@ -128,9 +127,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
cLogitBias
:=
C
.
CString
(
po
.
LogitBias
)
cLogitBias
:=
C
.
CString
(
po
.
LogitBias
)
defer
C
.
free
(
unsafe
.
Pointer
(
cLogitBias
))
defer
C
.
free
(
unsafe
.
Pointer
(
cLogitBias
))
cPathPromptCache
:=
C
.
CString
(
po
.
PathPromptCache
)
defer
C
.
free
(
unsafe
.
Pointer
(
cPathPromptCache
))
cMainGPU
:=
C
.
CString
(
po
.
MainGPU
)
cMainGPU
:=
C
.
CString
(
po
.
MainGPU
)
defer
C
.
free
(
unsafe
.
Pointer
(
cMainGPU
))
defer
C
.
free
(
unsafe
.
Pointer
(
cMainGPU
))
...
@@ -143,9 +139,7 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
...
@@ -143,9 +139,7 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
C
.
int
(
po
.
Batch
),
C
.
int
(
po
.
NKeep
),
pass
,
C
.
int
(
reverseCount
),
C
.
int
(
po
.
Batch
),
C
.
int
(
po
.
NKeep
),
pass
,
C
.
int
(
reverseCount
),
C
.
float
(
po
.
TailFreeSamplingZ
),
C
.
float
(
po
.
TypicalP
),
C
.
float
(
po
.
FrequencyPenalty
),
C
.
float
(
po
.
PresencePenalty
),
C
.
float
(
po
.
TailFreeSamplingZ
),
C
.
float
(
po
.
TypicalP
),
C
.
float
(
po
.
FrequencyPenalty
),
C
.
float
(
po
.
PresencePenalty
),
C
.
int
(
po
.
Mirostat
),
C
.
float
(
po
.
MirostatETA
),
C
.
float
(
po
.
MirostatTAU
),
C
.
bool
(
po
.
PenalizeNL
),
cLogitBias
,
C
.
int
(
po
.
Mirostat
),
C
.
float
(
po
.
MirostatETA
),
C
.
float
(
po
.
MirostatTAU
),
C
.
bool
(
po
.
PenalizeNL
),
cLogitBias
,
cPathPromptCache
,
C
.
bool
(
po
.
PromptCacheAll
),
C
.
bool
(
po
.
MLock
),
C
.
bool
(
po
.
MMap
),
C
.
bool
(
po
.
MLock
),
C
.
bool
(
po
.
MMap
),
cMainGPU
,
cTensorSplit
,
cMainGPU
,
cTensorSplit
,
C
.
bool
(
po
.
PromptCacheRO
),
)
)
defer
C
.
llama_free_params
(
params
)
defer
C
.
llama_free_params
(
params
)
...
...
llama/options.go
View file @
da74384a
...
@@ -57,11 +57,9 @@ type PredictOptions struct {
...
@@ -57,11 +57,9 @@ type PredictOptions struct {
LogitBias
string
LogitBias
string
TokenCallback
func
(
string
)
bool
TokenCallback
func
(
string
)
bool
PathPromptCache
string
MLock
,
MMap
bool
MLock
,
MMap
,
PromptCacheAll
bool
MainGPU
string
PromptCacheRO
bool
TensorSplit
string
MainGPU
string
TensorSplit
string
}
}
type
PredictOption
func
(
p
*
PredictOptions
)
type
PredictOption
func
(
p
*
PredictOptions
)
...
@@ -182,14 +180,6 @@ var Debug PredictOption = func(p *PredictOptions) {
...
@@ -182,14 +180,6 @@ var Debug PredictOption = func(p *PredictOptions) {
p
.
DebugMode
=
true
p
.
DebugMode
=
true
}
}
var
EnablePromptCacheAll
PredictOption
=
func
(
p
*
PredictOptions
)
{
p
.
PromptCacheAll
=
true
}
var
EnablePromptCacheRO
PredictOption
=
func
(
p
*
PredictOptions
)
{
p
.
PromptCacheRO
=
true
}
var
EnableMLock
ModelOption
=
func
(
p
*
ModelOptions
)
{
var
EnableMLock
ModelOption
=
func
(
p
*
ModelOptions
)
{
p
.
MLock
=
true
p
.
MLock
=
true
}
}
...
@@ -284,13 +274,6 @@ func SetTemperature(temp float64) PredictOption {
...
@@ -284,13 +274,6 @@ func SetTemperature(temp float64) PredictOption {
}
}
}
}
// SetPathPromptCache sets the session file to store the prompt cache.
func
SetPathPromptCache
(
f
string
)
PredictOption
{
return
func
(
p
*
PredictOptions
)
{
p
.
PathPromptCache
=
f
}
}
// SetPenalty sets the repetition penalty for text generation.
// SetPenalty sets the repetition penalty for text generation.
func
SetPenalty
(
penalty
float64
)
PredictOption
{
func
SetPenalty
(
penalty
float64
)
PredictOption
{
return
func
(
p
*
PredictOptions
)
{
return
func
(
p
*
PredictOptions
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment