Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
f95a1f2b
Unverified
Commit
f95a1f2b
authored
May 12, 2025
by
Michael Yang
Committed by
GitHub
May 12, 2025
Browse files
feat: add trace log level (#10650)
reduce prompt log to trace level
parent
82a9e946
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
141 additions
and
114 deletions
+141
-114
app/lifecycle/logging.go
app/lifecycle/logging.go
+2
-20
discover/gpu.go
discover/gpu.go
+1
-1
envconfig/config.go
envconfig/config.go
+16
-3
envconfig/config_test.go
envconfig/config_test.go
+33
-0
llm/server.go
llm/server.go
+35
-24
logutil/logutil.go
logutil/logutil.go
+29
-0
ml/backend/ggml/ggml.go
ml/backend/ggml/ggml.go
+2
-1
model/model.go
model/model.go
+2
-1
model/process_text.go
model/process_text.go
+4
-0
model/process_text_spm.go
model/process_text_spm.go
+7
-2
runner/llamarunner/runner.go
runner/llamarunner/runner.go
+4
-20
runner/ollamarunner/runner.go
runner/ollamarunner/runner.go
+4
-18
server/routes.go
server/routes.go
+2
-24
No files found.
app/lifecycle/logging.go
View file @
f95a1f2b
...
@@ -4,20 +4,14 @@ import (
...
@@ -4,20 +4,14 @@ import (
"fmt"
"fmt"
"log/slog"
"log/slog"
"os"
"os"
"path/filepath"
"strconv"
"strconv"
"strings"
"strings"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/logutil"
)
)
func
InitLogging
()
{
func
InitLogging
()
{
level
:=
slog
.
LevelInfo
if
envconfig
.
Debug
()
{
level
=
slog
.
LevelDebug
}
var
logFile
*
os
.
File
var
logFile
*
os
.
File
var
err
error
var
err
error
// Detect if we're a GUI app on windows, and if not, send logs to console
// Detect if we're a GUI app on windows, and if not, send logs to console
...
@@ -33,20 +27,8 @@ func InitLogging() {
...
@@ -33,20 +27,8 @@ func InitLogging() {
return
return
}
}
}
}
handler
:=
slog
.
NewTextHandler
(
logFile
,
&
slog
.
HandlerOptions
{
Level
:
level
,
AddSource
:
true
,
ReplaceAttr
:
func
(
_
[]
string
,
attr
slog
.
Attr
)
slog
.
Attr
{
if
attr
.
Key
==
slog
.
SourceKey
{
source
:=
attr
.
Value
.
Any
()
.
(
*
slog
.
Source
)
source
.
File
=
filepath
.
Base
(
source
.
File
)
}
return
attr
},
})
slog
.
SetDefault
(
slog
.
New
(
handler
))
slog
.
SetDefault
(
logutil
.
NewLogger
(
logFile
,
envconfig
.
LogLevel
()))
slog
.
Info
(
"ollama app started"
)
slog
.
Info
(
"ollama app started"
)
}
}
...
...
discover/gpu.go
View file @
f95a1f2b
...
@@ -670,7 +670,7 @@ func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, e
...
@@ -670,7 +670,7 @@ func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, e
}
}
func
getVerboseState
()
C
.
uint16_t
{
func
getVerboseState
()
C
.
uint16_t
{
if
envconfig
.
Debug
()
{
if
envconfig
.
LogLevel
()
<
slog
.
LevelInfo
{
return
C
.
uint16_t
(
1
)
return
C
.
uint16_t
(
1
)
}
}
return
C
.
uint16_t
(
0
)
return
C
.
uint16_t
(
0
)
...
...
envconfig/config.go
View file @
f95a1f2b
...
@@ -149,9 +149,22 @@ func Bool(k string) func() bool {
...
@@ -149,9 +149,22 @@ func Bool(k string) func() bool {
}
}
}
}
// LogLevel returns the log level for the application.
// Values are 0 or false INFO (Default), 1 or true DEBUG, 2 TRACE
func
LogLevel
()
slog
.
Level
{
level
:=
slog
.
LevelInfo
if
s
:=
Var
(
"OLLAMA_DEBUG"
);
s
!=
""
{
if
b
,
_
:=
strconv
.
ParseBool
(
s
);
b
{
level
=
slog
.
LevelDebug
}
else
if
i
,
_
:=
strconv
.
ParseInt
(
s
,
10
,
64
);
i
!=
0
{
level
=
slog
.
Level
(
i
*
-
4
)
}
}
return
level
}
var
(
var
(
// Debug enabled additional debug information.
Debug
=
Bool
(
"OLLAMA_DEBUG"
)
// FlashAttention enables the experimental flash attention feature.
// FlashAttention enables the experimental flash attention feature.
FlashAttention
=
Bool
(
"OLLAMA_FLASH_ATTENTION"
)
FlashAttention
=
Bool
(
"OLLAMA_FLASH_ATTENTION"
)
// KvCacheType is the quantization type for the K/V cache.
// KvCacheType is the quantization type for the K/V cache.
...
@@ -236,7 +249,7 @@ type EnvVar struct {
...
@@ -236,7 +249,7 @@ type EnvVar struct {
func
AsMap
()
map
[
string
]
EnvVar
{
func
AsMap
()
map
[
string
]
EnvVar
{
ret
:=
map
[
string
]
EnvVar
{
ret
:=
map
[
string
]
EnvVar
{
"OLLAMA_DEBUG"
:
{
"OLLAMA_DEBUG"
,
Debug
(),
"Show additional debug information (e.g. OLLAMA_DEBUG=1)"
},
"OLLAMA_DEBUG"
:
{
"OLLAMA_DEBUG"
,
LogLevel
(),
"Show additional debug information (e.g. OLLAMA_DEBUG=1)"
},
"OLLAMA_FLASH_ATTENTION"
:
{
"OLLAMA_FLASH_ATTENTION"
,
FlashAttention
(),
"Enabled flash attention"
},
"OLLAMA_FLASH_ATTENTION"
:
{
"OLLAMA_FLASH_ATTENTION"
,
FlashAttention
(),
"Enabled flash attention"
},
"OLLAMA_KV_CACHE_TYPE"
:
{
"OLLAMA_KV_CACHE_TYPE"
,
KvCacheType
(),
"Quantization type for the K/V cache (default: f16)"
},
"OLLAMA_KV_CACHE_TYPE"
:
{
"OLLAMA_KV_CACHE_TYPE"
,
KvCacheType
(),
"Quantization type for the K/V cache (default: f16)"
},
"OLLAMA_GPU_OVERHEAD"
:
{
"OLLAMA_GPU_OVERHEAD"
,
GpuOverhead
(),
"Reserve a portion of VRAM per GPU (bytes)"
},
"OLLAMA_GPU_OVERHEAD"
:
{
"OLLAMA_GPU_OVERHEAD"
,
GpuOverhead
(),
"Reserve a portion of VRAM per GPU (bytes)"
},
...
...
envconfig/config_test.go
View file @
f95a1f2b
package
envconfig
package
envconfig
import
(
import
(
"log/slog"
"math"
"math"
"testing"
"testing"
"time"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/logutil"
)
)
func
TestHost
(
t
*
testing
.
T
)
{
func
TestHost
(
t
*
testing
.
T
)
{
...
@@ -292,3 +294,34 @@ func TestContextLength(t *testing.T) {
...
@@ -292,3 +294,34 @@ func TestContextLength(t *testing.T) {
})
})
}
}
}
}
func
TestLogLevel
(
t
*
testing
.
T
)
{
cases
:=
map
[
string
]
slog
.
Level
{
// Default to INFO
""
:
slog
.
LevelInfo
,
"false"
:
slog
.
LevelInfo
,
"f"
:
slog
.
LevelInfo
,
"0"
:
slog
.
LevelInfo
,
// True values enable Debug
"true"
:
slog
.
LevelDebug
,
"t"
:
slog
.
LevelDebug
,
// Positive values increase verbosity
"1"
:
slog
.
LevelDebug
,
"2"
:
logutil
.
LevelTrace
,
// Negative values decrease verbosity
"-1"
:
slog
.
LevelWarn
,
"-2"
:
slog
.
LevelError
,
}
for
k
,
v
:=
range
cases
{
t
.
Run
(
k
,
func
(
t
*
testing
.
T
)
{
t
.
Setenv
(
"OLLAMA_DEBUG"
,
k
)
if
i
:=
LogLevel
();
i
!=
v
{
t
.
Errorf
(
"%s: expected %d, got %d"
,
k
,
v
,
i
)
}
})
}
}
llm/server.go
View file @
f95a1f2b
...
@@ -17,6 +17,7 @@ import (
...
@@ -17,6 +17,7 @@ import (
"os/exec"
"os/exec"
"path/filepath"
"path/filepath"
"runtime"
"runtime"
"slices"
"strconv"
"strconv"
"strings"
"strings"
"sync"
"sync"
...
@@ -30,9 +31,37 @@ import (
...
@@ -30,9 +31,37 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model"
)
)
type
filteredEnv
[]
string
func
(
e
filteredEnv
)
LogValue
()
slog
.
Value
{
var
attrs
[]
slog
.
Attr
for
_
,
env
:=
range
e
{
if
key
,
value
,
ok
:=
strings
.
Cut
(
env
,
"="
);
ok
{
switch
{
case
strings
.
HasPrefix
(
key
,
"OLLAMA_"
),
strings
.
HasPrefix
(
key
,
"CUDA_"
),
strings
.
HasPrefix
(
key
,
"ROCR_"
),
strings
.
HasPrefix
(
key
,
"ROCM_"
),
strings
.
HasPrefix
(
key
,
"HIP_"
),
strings
.
HasPrefix
(
key
,
"GPU_"
),
strings
.
HasPrefix
(
key
,
"HSA_"
),
strings
.
HasPrefix
(
key
,
"GGML_"
),
slices
.
Contains
([]
string
{
"PATH"
,
"LD_LIBRARY_PATH"
,
"DYLD_LIBRARY_PATH"
,
},
key
)
:
attrs
=
append
(
attrs
,
slog
.
String
(
key
,
value
))
}
}
}
return
slog
.
GroupValue
(
attrs
...
)
}
type
LlamaServer
interface
{
type
LlamaServer
interface
{
Ping
(
ctx
context
.
Context
)
error
Ping
(
ctx
context
.
Context
)
error
WaitUntilRunning
(
ctx
context
.
Context
)
error
WaitUntilRunning
(
ctx
context
.
Context
)
error
...
@@ -148,10 +177,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
...
@@ -148,10 +177,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
params
=
append
(
params
,
"--n-gpu-layers"
,
strconv
.
Itoa
(
opts
.
NumGPU
))
params
=
append
(
params
,
"--n-gpu-layers"
,
strconv
.
Itoa
(
opts
.
NumGPU
))
}
}
if
envconfig
.
Debug
()
{
params
=
append
(
params
,
"--verbose"
)
}
if
opts
.
MainGPU
>
0
{
if
opts
.
MainGPU
>
0
{
params
=
append
(
params
,
"--main-gpu"
,
strconv
.
Itoa
(
opts
.
MainGPU
))
params
=
append
(
params
,
"--main-gpu"
,
strconv
.
Itoa
(
opts
.
MainGPU
))
}
}
...
@@ -404,26 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
...
@@ -404,26 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
}
}
slog
.
Info
(
"starting llama server"
,
"cmd"
,
s
.
cmd
)
slog
.
Info
(
"starting llama server"
,
"cmd"
,
s
.
cmd
)
if
envconfig
.
Debug
()
{
slog
.
Debug
(
"subprocess"
,
""
,
filteredEnv
(
s
.
cmd
.
Env
))
filteredEnv
:=
[]
string
{}
for
_
,
ev
:=
range
s
.
cmd
.
Env
{
if
strings
.
HasPrefix
(
ev
,
"OLLAMA_"
)
||
strings
.
HasPrefix
(
ev
,
"CUDA_"
)
||
strings
.
HasPrefix
(
ev
,
"ROCR_"
)
||
strings
.
HasPrefix
(
ev
,
"ROCM_"
)
||
strings
.
HasPrefix
(
ev
,
"HIP_"
)
||
strings
.
HasPrefix
(
ev
,
"GPU_"
)
||
strings
.
HasPrefix
(
ev
,
"HSA_"
)
||
strings
.
HasPrefix
(
ev
,
"GGML_"
)
||
strings
.
HasPrefix
(
ev
,
"PATH="
)
||
strings
.
HasPrefix
(
ev
,
"LD_LIBRARY_PATH="
)
||
strings
.
HasPrefix
(
ev
,
"DYLD_LIBRARY_PATH="
)
{
filteredEnv
=
append
(
filteredEnv
,
ev
)
}
}
// Log at debug as the environment is inherited and might contain sensitive information
slog
.
Debug
(
"subprocess"
,
"environment"
,
filteredEnv
)
}
if
err
=
s
.
cmd
.
Start
();
err
!=
nil
{
if
err
=
s
.
cmd
.
Start
();
err
!=
nil
{
var
msg
string
var
msg
string
...
@@ -721,6 +727,9 @@ type CompletionResponse struct {
...
@@ -721,6 +727,9 @@ type CompletionResponse struct {
}
}
func
(
s
*
llmServer
)
Completion
(
ctx
context
.
Context
,
req
CompletionRequest
,
fn
func
(
CompletionResponse
))
error
{
func
(
s
*
llmServer
)
Completion
(
ctx
context
.
Context
,
req
CompletionRequest
,
fn
func
(
CompletionResponse
))
error
{
slog
.
Debug
(
"completion request"
,
"images"
,
len
(
req
.
Images
),
"prompt"
,
len
(
req
.
Prompt
),
"format"
,
string
(
req
.
Format
))
slog
.
Log
(
ctx
,
logutil
.
LevelTrace
,
"completion request"
,
"prompt"
,
req
.
Prompt
)
if
len
(
req
.
Format
)
>
0
{
if
len
(
req
.
Format
)
>
0
{
switch
string
(
req
.
Format
)
{
switch
string
(
req
.
Format
)
{
case
`null`
,
`""`
:
case
`null`
,
`""`
:
...
@@ -884,6 +893,8 @@ type EmbeddingResponse struct {
...
@@ -884,6 +893,8 @@ type EmbeddingResponse struct {
}
}
func
(
s
*
llmServer
)
Embedding
(
ctx
context
.
Context
,
input
string
)
([]
float32
,
error
)
{
func
(
s
*
llmServer
)
Embedding
(
ctx
context
.
Context
,
input
string
)
([]
float32
,
error
)
{
slog
.
Log
(
ctx
,
logutil
.
LevelTrace
,
"embedding request"
,
"input"
,
input
)
if
err
:=
s
.
sem
.
Acquire
(
ctx
,
1
);
err
!=
nil
{
if
err
:=
s
.
sem
.
Acquire
(
ctx
,
1
);
err
!=
nil
{
if
errors
.
Is
(
err
,
context
.
Canceled
)
{
if
errors
.
Is
(
err
,
context
.
Canceled
)
{
slog
.
Info
(
"aborting embedding request due to client closing the connection"
)
slog
.
Info
(
"aborting embedding request due to client closing the connection"
)
...
...
logutil/logutil.go
0 → 100644
View file @
f95a1f2b
package
logutil
import
(
"io"
"log/slog"
"path/filepath"
)
const
LevelTrace
slog
.
Level
=
-
8
func
NewLogger
(
w
io
.
Writer
,
level
slog
.
Level
)
*
slog
.
Logger
{
return
slog
.
New
(
slog
.
NewTextHandler
(
w
,
&
slog
.
HandlerOptions
{
Level
:
level
,
AddSource
:
true
,
ReplaceAttr
:
func
(
_
[]
string
,
attr
slog
.
Attr
)
slog
.
Attr
{
switch
attr
.
Key
{
case
slog
.
LevelKey
:
switch
attr
.
Value
.
Any
()
.
(
slog
.
Level
)
{
case
LevelTrace
:
attr
.
Value
=
slog
.
StringValue
(
"TRACE"
)
}
case
slog
.
SourceKey
:
source
:=
attr
.
Value
.
Any
()
.
(
*
slog
.
Source
)
source
.
File
=
filepath
.
Base
(
source
.
File
)
}
return
attr
},
}))
}
ml/backend/ggml/ggml.go
View file @
f95a1f2b
...
@@ -27,6 +27,7 @@ import (
...
@@ -27,6 +27,7 @@ import (
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/fs"
fsggml
"github.com/ollama/ollama/fs/ggml"
fsggml
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml"
ggml
"github.com/ollama/ollama/ml/backend/ggml/ggml/src"
ggml
"github.com/ollama/ollama/ml/backend/ggml/ggml/src"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/errgroup"
...
@@ -222,7 +223,7 @@ func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend,
...
@@ -222,7 +223,7 @@ func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend,
tt
:=
C
.
ggml_new_tensor
(
ctxs
[
bt
],
t
.
source
.
Kind
,
C
.
int
(
len
(
t
.
source
.
Shape
)),
(
*
C
.
int64_t
)(
unsafe
.
Pointer
(
&
t
.
source
.
Shape
[
0
])))
tt
:=
C
.
ggml_new_tensor
(
ctxs
[
bt
],
t
.
source
.
Kind
,
C
.
int
(
len
(
t
.
source
.
Shape
)),
(
*
C
.
int64_t
)(
unsafe
.
Pointer
(
&
t
.
source
.
Shape
[
0
])))
C
.
ggml_set_name
(
tt
,
cname
)
C
.
ggml_set_name
(
tt
,
cname
)
slog
.
Debug
(
"created tensor"
,
"name"
,
name
,
"shape"
,
t
.
source
.
Shape
,
"dtype"
,
t
.
source
.
Kind
,
"buffer_type"
,
C
.
GoString
(
C
.
ggml_backend_buft_name
(
bt
)))
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"created tensor"
,
"name"
,
name
,
"shape"
,
t
.
source
.
Shape
,
"dtype"
,
t
.
source
.
Kind
,
"buffer_type"
,
C
.
GoString
(
C
.
ggml_backend_buft_name
(
bt
)))
//nolint:staticcheck // TODO: check if buffer type supports this tensor
//nolint:staticcheck // TODO: check if buffer type supports this tensor
return
tt
return
tt
}
}
...
...
model/model.go
View file @
f95a1f2b
...
@@ -19,6 +19,7 @@ import (
...
@@ -19,6 +19,7 @@ import (
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/fs"
fsggml
"github.com/ollama/ollama/fs/ggml"
fsggml
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml"
_
"github.com/ollama/ollama/ml/backend"
_
"github.com/ollama/ollama/ml/backend"
"github.com/ollama/ollama/model/input"
"github.com/ollama/ollama/model/input"
...
@@ -202,7 +203,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
...
@@ -202,7 +203,7 @@ func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
names
:=
fn
(
tagsCopy
)
names
:=
fn
(
tagsCopy
)
for
_
,
name
:=
range
names
{
for
_
,
name
:=
range
names
{
if
tensor
:=
base
.
Backend
()
.
Get
(
strings
.
Join
(
name
,
"."
));
tensor
!=
nil
{
if
tensor
:=
base
.
Backend
()
.
Get
(
strings
.
Join
(
name
,
"."
));
tensor
!=
nil
{
slog
.
Debug
(
"found tensor"
,
""
,
tensor
)
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"found tensor"
,
""
,
tensor
)
vv
.
Set
(
reflect
.
ValueOf
(
tensor
))
vv
.
Set
(
reflect
.
ValueOf
(
tensor
))
break
break
}
}
...
...
model/process_text.go
View file @
f95a1f2b
...
@@ -2,6 +2,7 @@ package model
...
@@ -2,6 +2,7 @@ package model
import
(
import
(
"cmp"
"cmp"
"context"
"iter"
"iter"
"log/slog"
"log/slog"
"slices"
"slices"
...
@@ -10,6 +11,7 @@ import (
...
@@ -10,6 +11,7 @@ import (
"github.com/dlclark/regexp2"
"github.com/dlclark/regexp2"
heap
"github.com/emirpasic/gods/v2/trees/binaryheap"
heap
"github.com/emirpasic/gods/v2/trees/binaryheap"
"github.com/ollama/ollama/logutil"
)
)
type
Special
int32
type
Special
int32
...
@@ -322,6 +324,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
...
@@ -322,6 +324,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
}
}
}
}
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"encoded"
,
"ids"
,
ids
)
return
ids
,
nil
return
ids
,
nil
}
}
...
@@ -349,5 +352,6 @@ func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
...
@@ -349,5 +352,6 @@ func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
}
}
}
}
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"decoded"
,
"string"
,
sb
.
String
())
return
sb
.
String
(),
nil
return
sb
.
String
(),
nil
}
}
model/process_text_spm.go
View file @
f95a1f2b
...
@@ -2,10 +2,13 @@ package model
...
@@ -2,10 +2,13 @@ package model
import
(
import
(
"container/heap"
"container/heap"
"context"
"fmt"
"fmt"
"log/slog"
"log/slog"
"strconv"
"strconv"
"strings"
"strings"
"github.com/ollama/ollama/logutil"
)
)
const
spmWhitespaceSep
=
"▁"
const
spmWhitespaceSep
=
"▁"
...
@@ -22,7 +25,7 @@ func (spm SentencePieceModel) Vocabulary() *Vocabulary {
...
@@ -22,7 +25,7 @@ func (spm SentencePieceModel) Vocabulary() *Vocabulary {
}
}
func
NewSentencePieceModel
(
vocab
*
Vocabulary
)
SentencePieceModel
{
func
NewSentencePieceModel
(
vocab
*
Vocabulary
)
SentencePieceModel
{
slog
.
Debug
(
"Tokens"
,
"num tokens"
,
len
(
vocab
.
Values
),
"vals"
,
vocab
.
Values
[
:
5
],
"scores"
,
vocab
.
Scores
[
:
5
],
"types"
,
vocab
.
Types
[
:
5
])
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"Tokens"
,
"num tokens"
,
len
(
vocab
.
Values
),
"vals"
,
vocab
.
Values
[
:
5
],
"scores"
,
vocab
.
Scores
[
:
5
],
"types"
,
vocab
.
Types
[
:
5
])
counter
:=
map
[
int
]
int
{}
counter
:=
map
[
int
]
int
{}
var
maxTokenLen
int
var
maxTokenLen
int
...
@@ -36,7 +39,7 @@ func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
...
@@ -36,7 +39,7 @@ func NewSentencePieceModel(vocab *Vocabulary) SentencePieceModel {
}
}
}
}
slog
.
Debug
(
"Token counts"
,
"normal"
,
counter
[
TOKEN_TYPE_NORMAL
],
"unknown"
,
counter
[
TOKEN_TYPE_UNKNOWN
],
"control"
,
counter
[
TOKEN_TYPE_CONTROL
],
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"Token counts"
,
"normal"
,
counter
[
TOKEN_TYPE_NORMAL
],
"unknown"
,
counter
[
TOKEN_TYPE_UNKNOWN
],
"control"
,
counter
[
TOKEN_TYPE_CONTROL
],
"user defined"
,
counter
[
TOKEN_TYPE_USER_DEFINED
],
"unused"
,
counter
[
TOKEN_TYPE_UNUSED
],
"byte"
,
counter
[
TOKEN_TYPE_BYTE
],
"user defined"
,
counter
[
TOKEN_TYPE_USER_DEFINED
],
"unused"
,
counter
[
TOKEN_TYPE_UNUSED
],
"byte"
,
counter
[
TOKEN_TYPE_BYTE
],
"max token len"
,
maxTokenLen
)
"max token len"
,
maxTokenLen
)
...
@@ -199,6 +202,7 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error)
...
@@ -199,6 +202,7 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error)
}
}
}
}
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"encoded"
,
"ids"
,
ids
)
return
ids
,
nil
return
ids
,
nil
}
}
...
@@ -257,5 +261,6 @@ func (spm SentencePieceModel) Decode(ids []int32) (string, error) {
...
@@ -257,5 +261,6 @@ func (spm SentencePieceModel) Decode(ids []int32) (string, error) {
}
}
}
}
slog
.
Log
(
context
.
TODO
(),
logutil
.
LevelTrace
,
"decoded"
,
"string"
,
sb
.
String
())
return
sb
.
String
(),
nil
return
sb
.
String
(),
nil
}
}
runner/llamarunner/runner.go
View file @
f95a1f2b
...
@@ -11,7 +11,6 @@ import (
...
@@ -11,7 +11,6 @@ import (
"net"
"net"
"net/http"
"net/http"
"os"
"os"
"path/filepath"
"regexp"
"regexp"
"runtime"
"runtime"
"strconv"
"strconv"
...
@@ -23,8 +22,10 @@ import (
...
@@ -23,8 +22,10 @@ import (
"golang.org/x/sync/semaphore"
"golang.org/x/sync/semaphore"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/runner/common"
"github.com/ollama/ollama/runner/common"
)
)
...
@@ -680,8 +681,6 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) {
...
@@ -680,8 +681,6 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) {
w
.
Header
()
.
Set
(
"Content-Type"
,
"application/json"
)
w
.
Header
()
.
Set
(
"Content-Type"
,
"application/json"
)
slog
.
Debug
(
"embedding request"
,
"content"
,
req
.
Content
)
seq
,
err
:=
s
.
NewSequence
(
req
.
Content
,
nil
,
NewSequenceParams
{
embedding
:
true
})
seq
,
err
:=
s
.
NewSequence
(
req
.
Content
,
nil
,
NewSequenceParams
{
embedding
:
true
})
if
err
!=
nil
{
if
err
!=
nil
{
http
.
Error
(
w
,
fmt
.
Sprintf
(
"Failed to create new sequence: %v"
,
err
),
http
.
StatusInternalServerError
)
http
.
Error
(
w
,
fmt
.
Sprintf
(
"Failed to create new sequence: %v"
,
err
),
http
.
StatusInternalServerError
)
...
@@ -815,7 +814,7 @@ func Execute(args []string) error {
...
@@ -815,7 +814,7 @@ func Execute(args []string) error {
kvCacheType
:=
fs
.
String
(
"kv-cache-type"
,
""
,
"quantization type for KV cache (default: f16)"
)
kvCacheType
:=
fs
.
String
(
"kv-cache-type"
,
""
,
"quantization type for KV cache (default: f16)"
)
port
:=
fs
.
Int
(
"port"
,
8080
,
"Port to expose the server on"
)
port
:=
fs
.
Int
(
"port"
,
8080
,
"Port to expose the server on"
)
threads
:=
fs
.
Int
(
"threads"
,
runtime
.
NumCPU
(),
"Number of threads to use during generation"
)
threads
:=
fs
.
Int
(
"threads"
,
runtime
.
NumCPU
(),
"Number of threads to use during generation"
)
verbose
:
=
fs
.
Bool
(
"verbose"
,
false
,
"verbose output (default: disabled)"
)
_
=
fs
.
Bool
(
"verbose"
,
false
,
"verbose output (default: disabled)"
)
noMmap
:=
fs
.
Bool
(
"no-mmap"
,
false
,
"do not memory-map model (slower load but may reduce pageouts if not using mlock)"
)
noMmap
:=
fs
.
Bool
(
"no-mmap"
,
false
,
"do not memory-map model (slower load but may reduce pageouts if not using mlock)"
)
tensorSplit
:=
fs
.
String
(
"tensor-split"
,
""
,
"fraction of the model to offload to each GPU, comma-separated list of proportions"
)
tensorSplit
:=
fs
.
String
(
"tensor-split"
,
""
,
"fraction of the model to offload to each GPU, comma-separated list of proportions"
)
multiUserCache
:=
fs
.
Bool
(
"multiuser-cache"
,
false
,
"optimize input cache algorithm for multiple users"
)
multiUserCache
:=
fs
.
Bool
(
"multiuser-cache"
,
false
,
"optimize input cache algorithm for multiple users"
)
...
@@ -830,22 +829,7 @@ func Execute(args []string) error {
...
@@ -830,22 +829,7 @@ func Execute(args []string) error {
if
err
:=
fs
.
Parse
(
args
);
err
!=
nil
{
if
err
:=
fs
.
Parse
(
args
);
err
!=
nil
{
return
err
return
err
}
}
level
:=
slog
.
LevelInfo
slog
.
SetDefault
(
logutil
.
NewLogger
(
os
.
Stderr
,
envconfig
.
LogLevel
()))
if
*
verbose
{
level
=
slog
.
LevelDebug
}
handler
:=
slog
.
NewTextHandler
(
os
.
Stderr
,
&
slog
.
HandlerOptions
{
Level
:
level
,
AddSource
:
true
,
ReplaceAttr
:
func
(
_
[]
string
,
attr
slog
.
Attr
)
slog
.
Attr
{
if
attr
.
Key
==
slog
.
SourceKey
{
source
:=
attr
.
Value
.
Any
()
.
(
*
slog
.
Source
)
source
.
File
=
filepath
.
Base
(
source
.
File
)
}
return
attr
},
})
slog
.
SetDefault
(
slog
.
New
(
handler
))
slog
.
Info
(
"starting go runner"
)
slog
.
Info
(
"starting go runner"
)
llama
.
BackendInit
()
llama
.
BackendInit
()
...
...
runner/ollamarunner/runner.go
View file @
f95a1f2b
...
@@ -12,7 +12,6 @@ import (
...
@@ -12,7 +12,6 @@ import (
"net"
"net"
"net/http"
"net/http"
"os"
"os"
"path/filepath"
"regexp"
"regexp"
"runtime"
"runtime"
"strconv"
"strconv"
...
@@ -24,7 +23,9 @@ import (
...
@@ -24,7 +23,9 @@ import (
"golang.org/x/sync/semaphore"
"golang.org/x/sync/semaphore"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
"github.com/ollama/ollama/model/input"
...
@@ -816,7 +817,7 @@ func Execute(args []string) error {
...
@@ -816,7 +817,7 @@ func Execute(args []string) error {
kvCacheType
:=
fs
.
String
(
"kv-cache-type"
,
""
,
"quantization type for KV cache (default: f16)"
)
kvCacheType
:=
fs
.
String
(
"kv-cache-type"
,
""
,
"quantization type for KV cache (default: f16)"
)
port
:=
fs
.
Int
(
"port"
,
8080
,
"Port to expose the server on"
)
port
:=
fs
.
Int
(
"port"
,
8080
,
"Port to expose the server on"
)
threads
:=
fs
.
Int
(
"threads"
,
runtime
.
NumCPU
(),
"Number of threads to use during generation"
)
threads
:=
fs
.
Int
(
"threads"
,
runtime
.
NumCPU
(),
"Number of threads to use during generation"
)
verbose
:
=
fs
.
Bool
(
"verbose"
,
false
,
"verbose output (default: disabled)"
)
_
=
fs
.
Bool
(
"verbose"
,
false
,
"verbose output (default: disabled)"
)
_
=
fs
.
Bool
(
"no-mmap"
,
false
,
"do not memory-map model (slower load but may reduce pageouts if not using mlock)"
)
_
=
fs
.
Bool
(
"no-mmap"
,
false
,
"do not memory-map model (slower load but may reduce pageouts if not using mlock)"
)
tensorSplit
:=
fs
.
String
(
"tensor-split"
,
""
,
"fraction of the model to offload to each GPU, comma-separated list of proportions"
)
tensorSplit
:=
fs
.
String
(
"tensor-split"
,
""
,
"fraction of the model to offload to each GPU, comma-separated list of proportions"
)
multiUserCache
:=
fs
.
Bool
(
"multiuser-cache"
,
false
,
"optimize input cache algorithm for multiple users"
)
multiUserCache
:=
fs
.
Bool
(
"multiuser-cache"
,
false
,
"optimize input cache algorithm for multiple users"
)
...
@@ -831,22 +832,7 @@ func Execute(args []string) error {
...
@@ -831,22 +832,7 @@ func Execute(args []string) error {
if
err
:=
fs
.
Parse
(
args
);
err
!=
nil
{
if
err
:=
fs
.
Parse
(
args
);
err
!=
nil
{
return
err
return
err
}
}
level
:=
slog
.
LevelInfo
slog
.
SetDefault
(
logutil
.
NewLogger
(
os
.
Stderr
,
envconfig
.
LogLevel
()))
if
*
verbose
{
level
=
slog
.
LevelDebug
}
handler
:=
slog
.
NewTextHandler
(
os
.
Stderr
,
&
slog
.
HandlerOptions
{
Level
:
level
,
AddSource
:
true
,
ReplaceAttr
:
func
(
_
[]
string
,
attr
slog
.
Attr
)
slog
.
Attr
{
if
attr
.
Key
==
slog
.
SourceKey
{
source
:=
attr
.
Value
.
Any
()
.
(
*
slog
.
Source
)
source
.
File
=
filepath
.
Base
(
source
.
File
)
}
return
attr
},
})
slog
.
SetDefault
(
slog
.
New
(
handler
))
slog
.
Info
(
"starting ollama engine"
)
slog
.
Info
(
"starting ollama engine"
)
server
:=
&
Server
{
server
:=
&
Server
{
...
...
server/routes.go
View file @
f95a1f2b
...
@@ -17,7 +17,6 @@ import (
...
@@ -17,7 +17,6 @@ import (
"net/netip"
"net/netip"
"os"
"os"
"os/signal"
"os/signal"
"path/filepath"
"regexp"
"regexp"
"slices"
"slices"
"strings"
"strings"
...
@@ -33,6 +32,7 @@ import (
...
@@ -33,6 +32,7 @@ import (
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/model/models/mllama"
"github.com/ollama/ollama/model/models/mllama"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/client/ollama"
...
@@ -295,8 +295,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
...
@@ -295,8 +295,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
prompt
=
b
.
String
()
prompt
=
b
.
String
()
}
}
slog
.
Debug
(
"generate request"
,
"images"
,
len
(
images
),
"prompt"
,
prompt
)
ch
:=
make
(
chan
any
)
ch
:=
make
(
chan
any
)
go
func
()
{
go
func
()
{
// TODO (jmorganca): avoid building the response twice both here and below
// TODO (jmorganca): avoid building the response twice both here and below
...
@@ -1226,26 +1224,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
...
@@ -1226,26 +1224,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
}
}
func
Serve
(
ln
net
.
Listener
)
error
{
func
Serve
(
ln
net
.
Listener
)
error
{
level
:=
slog
.
LevelInfo
slog
.
SetDefault
(
logutil
.
NewLogger
(
os
.
Stderr
,
envconfig
.
LogLevel
()))
if
envconfig
.
Debug
()
{
level
=
slog
.
LevelDebug
}
slog
.
Info
(
"server config"
,
"env"
,
envconfig
.
Values
())
slog
.
Info
(
"server config"
,
"env"
,
envconfig
.
Values
())
handler
:=
slog
.
NewTextHandler
(
os
.
Stderr
,
&
slog
.
HandlerOptions
{
Level
:
level
,
AddSource
:
true
,
ReplaceAttr
:
func
(
_
[]
string
,
attr
slog
.
Attr
)
slog
.
Attr
{
if
attr
.
Key
==
slog
.
SourceKey
{
source
:=
attr
.
Value
.
Any
()
.
(
*
slog
.
Source
)
source
.
File
=
filepath
.
Base
(
source
.
File
)
}
return
attr
},
})
slog
.
SetDefault
(
slog
.
New
(
handler
))
blobsDir
,
err
:=
GetBlobsPath
(
""
)
blobsDir
,
err
:=
GetBlobsPath
(
""
)
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -1521,8 +1501,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
...
@@ -1521,8 +1501,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
return
}
}
slog
.
Debug
(
"chat request"
,
"images"
,
len
(
images
),
"prompt"
,
prompt
)
ch
:=
make
(
chan
any
)
ch
:=
make
(
chan
any
)
go
func
()
{
go
func
()
{
defer
close
(
ch
)
defer
close
(
ch
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment