Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
26c2e0bd
"git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "2c017ca44170fb56141bd15d6929390b7be3830a"
Commit
26c2e0bd
authored
Feb 26, 2025
by
Michael Yang
Browse files
ml/backend/ggml: handle user specified cpu offloading
parent
bf920883
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
10 deletions
+15
-10
ml/backend/ggml/ggml.go
ml/backend/ggml/ggml.go
+15
-10
No files found.
ml/backend/ggml/ggml.go
View file @
26c2e0bd
...
@@ -67,7 +67,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
...
@@ -67,7 +67,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
"num_key_values"
,
len
(
meta
.
KV
()),
"num_key_values"
,
len
(
meta
.
KV
()),
)
)
type
d
bt
struct
{
type
d
eviceBufferType
struct
{
d
*
C
.
struct_ggml_backend_device
d
*
C
.
struct_ggml_backend_device
bts
[]
*
C
.
struct_ggml_backend_buffer_type
bts
[]
*
C
.
struct_ggml_backend_buffer_type
}
}
...
@@ -96,7 +96,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
...
@@ -96,7 +96,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
var
sum
uint64
var
sum
uint64
var
cumsum
[]
uint64
var
cumsum
[]
uint64
var
gpuBufferTypes
[]
d
bt
var
gpu
Device
BufferTypes
[]
d
eviceBufferType
for
_
,
d
:=
range
gpus
{
for
_
,
d
:=
range
gpus
{
var
free
,
total
C
.
size_t
var
free
,
total
C
.
size_t
C
.
ggml_backend_dev_memory
(
d
,
&
free
,
&
total
)
C
.
ggml_backend_dev_memory
(
d
,
&
free
,
&
total
)
...
@@ -104,7 +104,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
...
@@ -104,7 +104,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
cumsum
=
append
(
cumsum
,
sum
)
cumsum
=
append
(
cumsum
,
sum
)
bt
:=
C
.
ggml_backend_dev_buffer_type
(
d
)
bt
:=
C
.
ggml_backend_dev_buffer_type
(
d
)
gpuBufferTypes
=
append
(
gpuBufferTypes
,
d
bt
{
gpu
Device
BufferTypes
=
append
(
gpu
Device
BufferTypes
,
d
eviceBufferType
{
d
:
d
,
d
:
d
,
bts
:
append
([]
*
C
.
struct_ggml_backend_buffer_type
{
bt
},
cpuBufferTypes
...
),
bts
:
append
([]
*
C
.
struct_ggml_backend_buffer_type
{
bt
},
cpuBufferTypes
...
),
})
})
...
@@ -115,7 +115,8 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
...
@@ -115,7 +115,8 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
splits
[
i
]
=
float64
(
cumsum
[
i
])
/
float64
(
sum
)
splits
[
i
]
=
float64
(
cumsum
[
i
])
/
float64
(
sum
)
}
}
input
:=
dbt
{
C
.
ggml_backend_dev_by_type
(
C
.
GGML_BACKEND_DEVICE_TYPE_CPU
),
cpuBufferTypes
}
cpuDeviceBufferTypes
:=
deviceBufferType
{
C
.
ggml_backend_dev_by_type
(
C
.
GGML_BACKEND_DEVICE_TYPE_CPU
),
cpuBufferTypes
}
input
:=
cpuDeviceBufferTypes
var
blocks
int
var
blocks
int
for
key
,
value
:=
range
meta
.
KV
()
{
for
key
,
value
:=
range
meta
.
KV
()
{
...
@@ -124,18 +125,22 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
...
@@ -124,18 +125,22 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
}
}
}
}
indexFunc
:=
func
(
i
int
)
func
(
float64
)
bool
{
assignLayer
:=
func
(
i
int
)
(
temp
deviceBufferType
)
{
return
func
(
f
float64
)
bool
{
if
i
>=
params
.
NumGPULayers
{
return
float64
(
i
)
/
float64
(
blocks
+
1
)
<
f
return
cpuDeviceBufferTypes
}
}
return
gpuDeviceBufferTypes
[
slices
.
IndexFunc
(
splits
,
func
(
f
float64
)
bool
{
return
float64
(
i
)
/
float64
(
blocks
+
1
)
<
f
})]
}
}
layers
:=
make
([]
d
bt
,
blocks
)
layers
:=
make
([]
d
eviceBufferType
,
blocks
)
for
i
:=
range
layers
{
for
i
:=
range
layers
{
layers
[
i
]
=
gpuBufferTypes
[
slices
.
IndexFunc
(
splits
,
indexFunc
(
i
)
)]
layers
[
i
]
=
assignLayer
(
i
)
}
}
output
:=
gpuBufferTypes
[
slices
.
IndexFunc
(
splits
,
indexFunc
(
blocks
)
)]
output
:=
assignLayer
(
blocks
)
maxTensors
:=
len
(
meta
.
Tensors
()
.
Items
())
maxTensors
:=
len
(
meta
.
Tensors
()
.
Items
())
maxTensors
+=
1
maxTensors
+=
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment