Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
8934324b
Commit
8934324b
authored
Mar 07, 2025
by
Michael Yang
Browse files
use fast attention
parent
0e886595
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
14 deletions
+8
-14
ml/backend/ggml/ggml.go
ml/backend/ggml/ggml.go
+2
-2
model/models/gemma3/model.go
model/models/gemma3/model.go
+2
-2
model/models/gemma3/model_vision.go
model/models/gemma3/model_vision.go
+4
-10
No files found.
ml/backend/ggml/ggml.go
View file @
8934324b
...
@@ -958,9 +958,9 @@ func (t *Tensor) Set(ctx ml.Context, t2 ml.Tensor, offset int, strides ...int) m
...
@@ -958,9 +958,9 @@ func (t *Tensor) Set(ctx ml.Context, t2 ml.Tensor, offset int, strides ...int) m
var
tt
*
C
.
struct_ggml_tensor
var
tt
*
C
.
struct_ggml_tensor
switch
len
(
strides
)
{
switch
len
(
strides
)
{
case
0
:
case
0
:
tt
=
C
.
ggml_set_1d
_inplace
(
ctx
.
(
*
Context
)
.
ctx
,
t
.
t
,
t2
.
(
*
Tensor
)
.
t
,
C
.
size_t
(
offset
))
tt
=
C
.
ggml_set_1d
(
ctx
.
(
*
Context
)
.
ctx
,
t
.
t
,
t2
.
(
*
Tensor
)
.
t
,
C
.
size_t
(
offset
))
case
1
:
case
1
:
tt
=
C
.
ggml_set_2d
_inplace
(
ctx
.
(
*
Context
)
.
ctx
,
t
.
t
,
t2
.
(
*
Tensor
)
.
t
,
C
.
size_t
(
offset
),
C
.
size_t
(
strides
[
0
]))
tt
=
C
.
ggml_set_2d
(
ctx
.
(
*
Context
)
.
ctx
,
t
.
t
,
t2
.
(
*
Tensor
)
.
t
,
C
.
size_t
(
offset
),
C
.
size_t
(
strides
[
0
]))
default
:
default
:
panic
(
"unsupported number of dimensions"
)
panic
(
"unsupported number of dimensions"
)
}
}
...
...
model/models/gemma3/model.go
View file @
8934324b
...
@@ -138,8 +138,8 @@ func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Inpu
...
@@ -138,8 +138,8 @@ func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Inpu
{
Token
:
255999
},
// "<start_of_image>""
{
Token
:
255999
},
// "<start_of_image>""
}
}
//
<image_soft_token>
//
pad inputs with placeholders for image embeddings
imageInputs
=
append
(
imageInputs
,
slices
.
Repeat
([]
input
.
Input
{{
Token
:
262144
}},
256
)
...
)
imageInputs
=
append
(
imageInputs
,
slices
.
Repeat
([]
input
.
Input
{{
Token
:
0
}},
256
)
...
)
// <end_of_image>
// <end_of_image>
imageInputs
=
append
(
imageInputs
,
input
.
Input
{
Token
:
256000
})
imageInputs
=
append
(
imageInputs
,
input
.
Input
{
Token
:
256000
})
...
...
model/models/gemma3/model_vision.go
View file @
8934324b
...
@@ -24,17 +24,11 @@ func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenState ml.Tensor, op
...
@@ -24,17 +24,11 @@ func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenState ml.Tensor, op
key
:=
sa
.
Key
.
Forward
(
ctx
,
hiddenState
)
key
:=
sa
.
Key
.
Forward
(
ctx
,
hiddenState
)
value
:=
sa
.
Value
.
Forward
(
ctx
,
hiddenState
)
value
:=
sa
.
Value
.
Forward
(
ctx
,
hiddenState
)
query
=
query
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
query
.
Dim
(
1
),
batchSize
)
.
Permute
(
ctx
,
0
,
2
,
1
,
3
)
query
=
query
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
query
.
Dim
(
1
),
batchSize
)
key
=
key
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
key
.
Dim
(
1
),
batchSize
)
.
Permute
(
ctx
,
0
,
2
,
1
,
3
)
key
=
key
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
key
.
Dim
(
1
),
batchSize
)
value
=
value
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
value
.
Dim
(
1
),
batchSize
)
.
Permute
(
ctx
,
1
,
2
,
0
,
3
)
.
Contiguous
(
ctx
)
value
=
value
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
value
.
Dim
(
1
),
batchSize
)
scores
:=
key
.
Mulmat
(
ctx
,
query
)
attention
:=
nn
.
Attention
(
ctx
,
query
,
key
,
value
,
1.0
/
math
.
Sqrt
(
float64
(
headDim
)),
nil
)
scores
=
scores
.
Scale
(
ctx
,
1.0
/
math
.
Sqrt
(
float64
(
headDim
)))
scores
=
scores
.
Softmax
(
ctx
)
attention
:=
value
.
Mulmat
(
ctx
,
scores
)
attention
=
attention
.
Reshape
(
ctx
,
headDim
,
attention
.
Dim
(
1
),
opts
.
numHeads
,
batchSize
)
attention
=
attention
.
Permute
(
ctx
,
0
,
2
,
1
,
3
)
.
Contiguous
(
ctx
)
attention
=
attention
.
Reshape
(
ctx
,
opts
.
hiddenSize
,
attention
.
Dim
(
2
),
batchSize
)
attention
=
attention
.
Reshape
(
ctx
,
opts
.
hiddenSize
,
attention
.
Dim
(
2
),
batchSize
)
hiddenState
=
sa
.
Output
.
Forward
(
ctx
,
attention
)
hiddenState
=
sa
.
Output
.
Forward
(
ctx
,
attention
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment