Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
9e4642e9
Commit
9e4642e9
authored
Mar 09, 2025
by
Michael Yang
Browse files
ollama debug tensor
parent
6b0486c2
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
186 additions
and
1 deletion
+186
-1
llama/patches/0020-ollama-debug-tensor.patch
llama/patches/0020-ollama-debug-tensor.patch
+33
-0
ml/backend/ggml/ggml.go
ml/backend/ggml/ggml.go
+1
-1
ml/backend/ggml/ggml/include/ollama-debug.h
ml/backend/ggml/ggml/include/ollama-debug.h
+11
-0
ml/backend/ggml/ggml/src/ggml-cpu/cpu_debug.go
ml/backend/ggml/ggml/src/ggml-cpu/cpu_debug.go
+6
-0
ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c
ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c
+6
-0
ml/backend/ggml/ggml/src/ollama-debug.c
ml/backend/ggml/ggml/src/ollama-debug.c
+115
-0
ml/backend/ggml/threads.go
ml/backend/ggml/threads.go
+7
-0
ml/backend/ggml/threads_debug.go
ml/backend/ggml/threads_debug.go
+7
-0
No files found.
llama/patches/0020-ollama-debug-tensor.patch
0 → 100644
View file @
9e4642e9
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Sun, 9 Mar 2025 14:44:16 -0700
Subject: [PATCH] ollama debug tensor
---
ggml/src/ggml-cpu/ggml-cpu.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
index 2f606d82..ec60e8fc 100644
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -11,6 +11,8 @@
#include "ggml-threading.h"
#include "ggml.h"
+#include "ollama-debug.h"
+
#if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@@ -14103,6 +14105,10 @@
static thread_ret_t ggml_graph_compute_thread(void * data) {
ggml_compute_forward(¶ms, node);
+#ifdef OLLAMA_DEBUG
+ ollama_debug(node, true);
+#endif
+
if (state->ith == 0 && cplan->abort_callback &&
cplan->abort_callback(cplan->abort_callback_data)) {
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
ml/backend/ggml/ggml.go
View file @
9e4642e9
...
@@ -355,7 +355,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
...
@@ -355,7 +355,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
if
C
.
ggml_backend_is_cpu
(
b
)
{
if
C
.
ggml_backend_is_cpu
(
b
)
{
// set number of threads for cpu backend
// set number of threads for cpu backend
C
.
ggml_backend_cpu_set_n_threads
(
b
,
C
.
int
(
params
.
NumThreads
))
C
.
ggml_backend_cpu_set_n_threads
(
b
,
C
.
int
(
Threads
(
params
.
NumThreads
))
)
}
}
}
}
...
...
ml/backend/ggml/ggml/include/ollama-debug.h
0 → 100644
View file @
9e4642e9
#include "ggml.h"
#ifdef __cplusplus
extern
"C"
{
#endif
void
ollama_debug
(
const
struct
ggml_tensor
*
tensor
,
bool
verbose
);
#ifdef __cplusplus
}
#endif
ml/backend/ggml/ggml/src/ggml-cpu/cpu_debug.go
0 → 100644
View file @
9e4642e9
//go:build debug
package
cpu
// #cgo CPPFLAGS: -DOLLAMA_DEBUG
import
"C"
ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c
View file @
9e4642e9
...
@@ -11,6 +11,8 @@
...
@@ -11,6 +11,8 @@
#include "ggml-threading.h"
#include "ggml-threading.h"
#include "ggml.h"
#include "ggml.h"
#include "ollama-debug.h"
#if defined(_MSC_VER) || defined(__MINGW32__)
#if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW
#include <malloc.h> // using malloc.h with MSC/MINGW
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
...
@@ -14103,6 +14105,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
...
@@ -14103,6 +14105,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
ggml_compute_forward(¶ms, node);
ggml_compute_forward(¶ms, node);
#ifdef OLLAMA_DEBUG
ollama_debug(node, true);
#endif
if (state->ith == 0 && cplan->abort_callback &&
if (state->ith == 0 && cplan->abort_callback &&
cplan->abort_callback(cplan->abort_callback_data)) {
cplan->abort_callback(cplan->abort_callback_data)) {
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
...
...
ml/backend/ggml/ggml/src/ollama-debug.c
0 → 100644
View file @
9e4642e9
#include <string.h>
#include "ollama-debug.h"
static
int
mul
(
int64_t
*
dims
,
int
ndims
)
{
int
result
=
1
;
for
(
int
i
=
0
;
i
<
ndims
;
i
++
)
{
result
*=
dims
[
i
];
}
return
result
;
}
static
void
repeat
(
char
c
,
int
n
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
fprintf
(
stderr
,
"%c"
,
c
);
}
}
static
void
print_tensor
(
const
void
*
tensor
,
void
(
*
cb
)(
const
void
*
,
int
),
int
shape
,
int64_t
*
dims
,
int
ndims
,
int
stride
,
int
nitems
,
int
pad
)
{
fprintf
(
stderr
,
"["
);
for
(
int
i
=
0
;
i
<
dims
[
0
];
i
++
)
{
if
(
i
>=
nitems
&&
i
<
dims
[
0
]
-
nitems
)
{
fprintf
(
stderr
,
"... (%lld more), "
,
dims
[
0
]
-
2
*
nitems
);
int
skip
=
dims
[
0
]
-
2
*
nitems
;
if
(
ndims
>
1
)
{
stride
+=
mul
(
dims
+
1
,
ndims
-
1
)
*
skip
;
repeat
(
'\n'
,
ndims
-
1
);
repeat
(
' '
,
shape
-
ndims
+
1
+
pad
);
}
i
+=
skip
-
1
;
}
else
if
(
ndims
>
1
)
{
print_tensor
(
tensor
,
cb
,
shape
,
dims
+
1
,
ndims
-
1
,
stride
,
nitems
,
pad
);
stride
+=
mul
(
dims
+
1
,
ndims
-
1
);
if
(
i
<
dims
[
0
]
-
1
)
{
fprintf
(
stderr
,
", "
);
repeat
(
'\n'
,
ndims
-
1
);
repeat
(
' '
,
shape
-
ndims
+
1
+
pad
);
}
}
else
{
cb
(
tensor
,
stride
+
i
);
if
(
i
<
dims
[
0
]
-
1
)
{
fprintf
(
stderr
,
", "
);
}
}
}
fprintf
(
stderr
,
"]"
);
}
static
void
print_tensor_f16
(
const
void
*
tensor
,
int
i
)
{
float
value
=
ggml_fp16_to_fp32
(((
const
ggml_fp16_t
*
)
tensor
)[
i
]);
fprintf
(
stderr
,
"%s%f"
,
value
<
0
?
""
:
" "
,
value
);
}
static
void
print_tensor_f32
(
const
void
*
tensor
,
int
i
)
{
float
value
=
((
const
float
*
)
tensor
)[
i
];
fprintf
(
stderr
,
"%s%f"
,
value
<
0
?
""
:
" "
,
value
);
}
static
void
print_tensor_i32
(
const
void
*
tensor
,
int
i
)
{
int32_t
value
=
((
const
int32_t
*
)
tensor
)[
i
];
fprintf
(
stderr
,
"%s%d"
,
value
<
0
?
""
:
" "
,
value
);
}
static
void
ollama_debug_tensor
(
const
struct
ggml_tensor
*
tensor
,
bool
verbose
,
const
char
*
prefix
,
int
indent
)
{
fprintf
(
stderr
,
"%s%s %s (%s): [%lld %lld %lld %lld]
\n
"
,
prefix
,
tensor
->
name
,
ggml_op_name
(
tensor
->
op
),
ggml_type_name
(
tensor
->
type
),
tensor
->
ne
[
0
],
tensor
->
ne
[
1
],
tensor
->
ne
[
2
],
tensor
->
ne
[
3
]);
if
(
!
verbose
)
{
return
;
}
for
(
int
i
=
0
;
i
<
indent
;
i
++
)
{
fprintf
(
stderr
,
" "
);
}
switch
(
tensor
->
type
)
{
case
GGML_TYPE_F16
:
print_tensor
(
ggml_get_data
(
tensor
),
print_tensor_f16
,
ggml_n_dims
(
tensor
),
(
int64_t
*
)
tensor
->
ne
,
ggml_n_dims
(
tensor
),
0
,
3
,
indent
);
break
;
case
GGML_TYPE_F32
:
print_tensor
(
ggml_get_data
(
tensor
),
print_tensor_f32
,
ggml_n_dims
(
tensor
),
(
int64_t
*
)
tensor
->
ne
,
ggml_n_dims
(
tensor
),
0
,
3
,
indent
);
break
;
case
GGML_TYPE_I32
:
print_tensor
(
ggml_get_data
(
tensor
),
print_tensor_i32
,
ggml_n_dims
(
tensor
),
(
int64_t
*
)
tensor
->
ne
,
ggml_n_dims
(
tensor
),
0
,
3
,
indent
);
break
;
default:
fprintf
(
stderr
,
"<unsupported type>
\n
"
);
return
;
}
fprintf
(
stderr
,
"
\n
"
);
}
void
ollama_debug
(
const
struct
ggml_tensor
*
tensor
,
bool
verbose
)
{
ollama_debug_tensor
(
tensor
,
verbose
,
">>> "
,
4
);
for
(
int
i
=
0
;
i
<
GGML_MAX_SRC
&&
tensor
->
src
[
i
]
!=
NULL
;
++
i
)
{
char
src
[
8
];
const
int
n
=
snprintf
(
src
,
sizeof
(
src
),
" src%d "
,
i
);
if
(
n
>=
sizeof
(
src
))
{
src
[
sizeof
(
src
)
-
1
]
=
'\0'
;
}
ollama_debug_tensor
(
tensor
->
src
[
i
],
verbose
,
src
,
4
);
}
}
ml/backend/ggml/threads.go
0 → 100644
View file @
9e4642e9
//go:build !debug
package
ggml
func
Threads
(
n
int
)
int
{
return
n
}
ml/backend/ggml/threads_debug.go
0 → 100644
View file @
9e4642e9
//go:build debug
package
ggml
func
Threads
(
_
int
)
int
{
return
1
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment