Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c8900113
You need to sign in or sign up before continuing.
Unverified
Commit
c8900113
authored
May 21, 2025
by
Michael Yang
Committed by
GitHub
May 21, 2025
Browse files
feat: port qwen2 model (#10782)
parent
e0ed984c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
194 additions
and
24 deletions
+194
-24
model/models/llama/model.go
model/models/llama/model.go
+23
-24
model/models/models.go
model/models/models.go
+1
-0
model/models/qwen2/model.go
model/models/qwen2/model.go
+170
-0
No files found.
model/models/llama/model.go
View file @
c8900113
...
@@ -75,30 +75,31 @@ type SelfAttention struct {
...
@@ -75,30 +75,31 @@ type SelfAttention struct {
RopeFactors
ml
.
Tensor
`gguf:"rope_freqs.weight"`
RopeFactors
ml
.
Tensor
`gguf:"rope_freqs.weight"`
}
}
func
(
sa
*
SelfAttention
)
Forward
(
ctx
ml
.
Context
,
hiddenState
,
position
ID
s
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
Options
)
ml
.
Tensor
{
func
(
sa
*
SelfAttention
)
Forward
(
ctx
ml
.
Context
,
hiddenState
,
positions
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
Options
)
ml
.
Tensor
{
batchSize
:=
hiddenState
.
Dim
(
1
)
batchSize
:=
hiddenState
.
Dim
(
1
)
headDim
:=
cmp
.
Or
(
opts
.
headDim
,
opts
.
hiddenSize
/
opts
.
numHeads
)
headDim
:=
cmp
.
Or
(
opts
.
headDim
,
opts
.
hiddenSize
/
opts
.
numHeads
)
ropeDim
:=
cmp
.
Or
(
opts
.
ropeDim
,
headDim
)
q
:=
sa
.
Query
.
Forward
(
ctx
,
hiddenState
)
query
:=
sa
.
Query
.
Forward
(
ctx
,
hiddenState
)
q
=
q
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
batchSize
)
query
=
query
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
batchSize
)
q
=
fast
.
RoPE
(
ctx
,
q
,
positionIDs
,
opts
.
ropeDim
,
opts
.
ropeBase
,
opts
.
ropeScale
,
rope
.
WithFactors
(
sa
.
RopeFactors
))
k
:=
sa
.
Key
.
Forward
(
ctx
,
hiddenState
)
key
:=
sa
.
Key
.
Forward
(
ctx
,
hiddenState
)
k
=
k
.
Reshape
(
ctx
,
headDim
,
opts
.
numKVHeads
,
batchSize
)
key
=
key
.
Reshape
(
ctx
,
headDim
,
opts
.
numKVHeads
,
batchSize
)
k
=
fast
.
RoPE
(
ctx
,
k
,
positionIDs
,
opts
.
ropeDim
,
opts
.
ropeBase
,
opts
.
ropeScale
,
rope
.
WithFactors
(
sa
.
RopeFactors
))
v
:=
sa
.
Value
.
Forward
(
ctx
,
hiddenState
)
v
alue
:=
sa
.
Value
.
Forward
(
ctx
,
hiddenState
)
v
=
v
.
Reshape
(
ctx
,
headDim
,
opts
.
numKVHeads
,
batchSize
)
v
alue
=
value
.
Reshape
(
ctx
,
headDim
,
opts
.
numKVHeads
,
batchSize
)
scaleFactor
:=
1.0
/
math
.
Sqrt
(
float64
(
headDim
))
query
=
fast
.
RoPE
(
ctx
,
query
,
positions
,
ropeDim
,
opts
.
ropeBase
,
opts
.
ropeScale
,
rope
.
WithFactors
(
sa
.
RopeFactors
))
kqv
:=
nn
.
Attention
(
ctx
,
q
,
k
,
v
,
scaleFactor
,
cache
)
key
=
fast
.
RoPE
(
ctx
,
key
,
positions
,
ropeDim
,
opts
.
ropeBase
,
opts
.
ropeScale
,
rope
.
WithFactors
(
sa
.
RopeFactors
))
kqv
=
kqv
.
Reshape
(
ctx
,
headDim
*
opts
.
numHeads
,
batchSize
)
return
sa
.
Output
.
Forward
(
ctx
,
kqv
)
attention
:=
nn
.
Attention
(
ctx
,
query
,
key
,
value
,
1.0
/
math
.
Sqrt
(
float64
(
headDim
)),
cache
)
attention
=
attention
.
Reshape
(
ctx
,
headDim
*
opts
.
numHeads
,
batchSize
)
return
sa
.
Output
.
Forward
(
ctx
,
attention
)
}
}
func
(
m
*
Model
)
Shift
(
ctx
ml
.
Context
,
layer
int
,
key
,
shift
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
func
(
m
*
Model
)
Shift
(
ctx
ml
.
Context
,
layer
int
,
key
,
shift
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
return
fast
.
RoPE
(
ctx
,
key
,
shift
,
m
.
ropeDim
,
m
.
ropeBase
,
m
.
ropeScale
,
rope
.
WithFactors
(
m
.
Layers
[
layer
]
.
SelfAttention
.
RopeFactors
)),
nil
ropeDim
:=
cmp
.
Or
(
m
.
ropeDim
,
m
.
hiddenSize
/
m
.
numHeads
)
return
fast
.
RoPE
(
ctx
,
key
,
shift
,
ropeDim
,
m
.
ropeBase
,
m
.
ropeScale
,
rope
.
WithFactors
(
m
.
Layers
[
layer
]
.
SelfAttention
.
RopeFactors
)),
nil
}
}
type
MLP
struct
{
type
MLP
struct
{
...
@@ -119,11 +120,11 @@ type Layer struct {
...
@@ -119,11 +120,11 @@ type Layer struct {
MLP
*
MLP
MLP
*
MLP
}
}
func
(
l
*
Layer
)
Forward
(
ctx
ml
.
Context
,
hiddenState
,
position
ID
s
,
outputs
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
Options
)
ml
.
Tensor
{
func
(
l
*
Layer
)
Forward
(
ctx
ml
.
Context
,
hiddenState
,
positions
,
outputs
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
Options
)
ml
.
Tensor
{
residual
:=
hiddenState
residual
:=
hiddenState
hiddenState
=
l
.
AttentionNorm
.
Forward
(
ctx
,
hiddenState
,
opts
.
eps
)
hiddenState
=
l
.
AttentionNorm
.
Forward
(
ctx
,
hiddenState
,
opts
.
eps
)
hiddenState
=
l
.
SelfAttention
.
Forward
(
ctx
,
hiddenState
,
position
ID
s
,
cache
,
opts
)
hiddenState
=
l
.
SelfAttention
.
Forward
(
ctx
,
hiddenState
,
positions
,
cache
,
opts
)
// In the final layer (outputs != nil), optimize by pruning to just the token positions
// In the final layer (outputs != nil), optimize by pruning to just the token positions
// we need logits for.
// we need logits for.
...
@@ -146,22 +147,20 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
...
@@ -146,22 +147,20 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
return
nil
,
err
return
nil
,
err
}
}
outputs
,
err
:=
ctx
.
Input
()
.
FromIntSlice
(
batch
.
Outputs
,
len
(
batch
.
Outputs
))
if
err
!=
nil
{
return
nil
,
err
}
hiddenState
:=
m
.
TokenEmbedding
.
Forward
(
ctx
,
batch
.
Inputs
)
hiddenState
:=
m
.
TokenEmbedding
.
Forward
(
ctx
,
batch
.
Inputs
)
for
i
,
layer
:=
range
m
.
Layers
{
for
i
,
layer
:=
range
m
.
Layers
{
m
.
Cache
.
SetLayer
(
i
)
m
.
Cache
.
SetLayer
(
i
)
var
lastLayerO
utputs
ml
.
Tensor
var
o
utputs
ml
.
Tensor
if
i
==
len
(
m
.
Layers
)
-
1
{
if
i
==
len
(
m
.
Layers
)
-
1
{
lastLayerOutputs
=
outputs
outputs
,
err
=
ctx
.
Input
()
.
FromIntSlice
(
batch
.
Outputs
,
len
(
batch
.
Outputs
))
if
err
!=
nil
{
return
nil
,
err
}
}
}
hiddenState
=
layer
.
Forward
(
ctx
,
hiddenState
,
positions
,
lastLayerO
utputs
,
m
.
Cache
,
m
.
Options
)
hiddenState
=
layer
.
Forward
(
ctx
,
hiddenState
,
positions
,
o
utputs
,
m
.
Cache
,
m
.
Options
)
}
}
hiddenState
=
m
.
OutputNorm
.
Forward
(
ctx
,
hiddenState
,
m
.
eps
)
hiddenState
=
m
.
OutputNorm
.
Forward
(
ctx
,
hiddenState
,
m
.
eps
)
...
...
model/models/models.go
View file @
c8900113
...
@@ -7,6 +7,7 @@ import (
...
@@ -7,6 +7,7 @@ import (
_
"github.com/ollama/ollama/model/models/llama4"
_
"github.com/ollama/ollama/model/models/llama4"
_
"github.com/ollama/ollama/model/models/mistral3"
_
"github.com/ollama/ollama/model/models/mistral3"
_
"github.com/ollama/ollama/model/models/mllama"
_
"github.com/ollama/ollama/model/models/mllama"
_
"github.com/ollama/ollama/model/models/qwen2"
_
"github.com/ollama/ollama/model/models/qwen25vl"
_
"github.com/ollama/ollama/model/models/qwen25vl"
_
"github.com/ollama/ollama/model/models/qwen3"
_
"github.com/ollama/ollama/model/models/qwen3"
)
)
model/models/qwen2/model.go
0 → 100644
View file @
c8900113
package
qwen2
import
(
"cmp"
"math"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/ml/nn/fast"
"github.com/ollama/ollama/ml/nn/rope"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
)
type
Options
struct
{
hiddenSize
,
numHeads
,
numKVHeads
int
headDim
,
ropeDim
int
eps
,
ropeBase
,
ropeScale
float32
}
type
Attention
struct
{
Query
*
nn
.
Linear
`gguf:"attn_q"`
Key
*
nn
.
Linear
`gguf:"attn_k"`
Value
*
nn
.
Linear
`gguf:"attn_v"`
Output
*
nn
.
Linear
`gguf:"attn_output"`
}
func
(
attn
Attention
)
Forward
(
ctx
ml
.
Context
,
hiddenStates
,
positions
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
Options
)
ml
.
Tensor
{
batchSize
:=
hiddenStates
.
Dim
(
1
)
headDim
:=
cmp
.
Or
(
opts
.
headDim
,
opts
.
hiddenSize
/
opts
.
numHeads
)
ropeDim
:=
cmp
.
Or
(
opts
.
ropeDim
,
headDim
)
query
:=
attn
.
Query
.
Forward
(
ctx
,
hiddenStates
)
query
=
query
.
Reshape
(
ctx
,
headDim
,
opts
.
numHeads
,
batchSize
)
key
:=
attn
.
Key
.
Forward
(
ctx
,
hiddenStates
)
key
=
key
.
Reshape
(
ctx
,
headDim
,
opts
.
numKVHeads
,
batchSize
)
value
:=
attn
.
Value
.
Forward
(
ctx
,
hiddenStates
)
value
=
value
.
Reshape
(
ctx
,
headDim
,
opts
.
numKVHeads
,
batchSize
)
query
=
fast
.
RoPE
(
ctx
,
query
,
positions
,
ropeDim
,
opts
.
ropeBase
,
opts
.
ropeScale
,
rope
.
WithTypeNeoX
())
key
=
fast
.
RoPE
(
ctx
,
key
,
positions
,
ropeDim
,
opts
.
ropeBase
,
opts
.
ropeScale
,
rope
.
WithTypeNeoX
())
attention
:=
nn
.
Attention
(
ctx
,
query
,
key
,
value
,
1.0
/
math
.
Sqrt
(
float64
(
headDim
)),
cache
)
attention
=
attention
.
Reshape
(
ctx
,
headDim
*
opts
.
numHeads
,
batchSize
)
return
attn
.
Output
.
Forward
(
ctx
,
attention
)
}
type
MLP
struct
{
Gate
*
nn
.
Linear
`gguf:"ffn_gate"`
Up
*
nn
.
Linear
`gguf:"ffn_up"`
Down
*
nn
.
Linear
`gguf:"ffn_down"`
}
func
(
mlp
MLP
)
Forward
(
ctx
ml
.
Context
,
hiddenStates
ml
.
Tensor
)
ml
.
Tensor
{
hiddenStates
=
mlp
.
Gate
.
Forward
(
ctx
,
hiddenStates
)
.
SILU
(
ctx
)
.
Mul
(
ctx
,
mlp
.
Up
.
Forward
(
ctx
,
hiddenStates
))
return
mlp
.
Down
.
Forward
(
ctx
,
hiddenStates
)
}
type
DecoderLayer
struct
{
AttentionNorm
*
nn
.
RMSNorm
`gguf:"attn_norm"`
Attention
*
Attention
MLPNorm
*
nn
.
RMSNorm
`gguf:"ffn_norm"`
MLP
*
MLP
}
func
(
d
DecoderLayer
)
Forward
(
ctx
ml
.
Context
,
hiddenStates
,
positions
,
outputs
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
Options
)
ml
.
Tensor
{
residual
:=
hiddenStates
hiddenStates
=
d
.
AttentionNorm
.
Forward
(
ctx
,
hiddenStates
,
opts
.
eps
)
hiddenStates
=
d
.
Attention
.
Forward
(
ctx
,
hiddenStates
,
positions
,
cache
,
opts
)
if
outputs
!=
nil
{
hiddenStates
=
hiddenStates
.
Rows
(
ctx
,
outputs
)
residual
=
residual
.
Rows
(
ctx
,
outputs
)
}
hiddenStates
=
hiddenStates
.
Add
(
ctx
,
residual
)
residual
=
hiddenStates
hiddenStates
=
d
.
MLPNorm
.
Forward
(
ctx
,
hiddenStates
,
opts
.
eps
)
hiddenStates
=
d
.
MLP
.
Forward
(
ctx
,
hiddenStates
)
return
hiddenStates
.
Add
(
ctx
,
residual
)
}
type
Model
struct
{
model
.
Base
model
.
BytePairEncoding
TokenEmbedding
*
nn
.
Embedding
`gguf:"token_embd"`
Layers
[]
DecoderLayer
`gguf:"blk"`
OutputNorm
*
nn
.
RMSNorm
`gguf:"output_norm"`
Output
*
nn
.
Linear
`gguf:"output,alt:token_embd"`
Options
}
// Forward implements model.Model.
func
(
m
Model
)
Forward
(
ctx
ml
.
Context
,
batch
input
.
Batch
)
(
ml
.
Tensor
,
error
)
{
positions
,
err
:=
ctx
.
Input
()
.
FromIntSlice
(
batch
.
Positions
,
len
(
batch
.
Positions
))
if
err
!=
nil
{
return
nil
,
err
}
hiddenStates
:=
m
.
TokenEmbedding
.
Forward
(
ctx
,
batch
.
Inputs
)
for
i
,
layer
:=
range
m
.
Layers
{
m
.
Cache
.
SetLayer
(
i
)
var
outputs
ml
.
Tensor
if
i
==
len
(
m
.
Layers
)
-
1
{
outputs
,
err
=
ctx
.
Input
()
.
FromIntSlice
(
batch
.
Outputs
,
len
(
batch
.
Outputs
))
if
err
!=
nil
{
return
nil
,
err
}
}
hiddenStates
=
layer
.
Forward
(
ctx
,
hiddenStates
,
positions
,
outputs
,
m
.
Cache
,
&
m
.
Options
)
}
hiddenStates
=
m
.
OutputNorm
.
Forward
(
ctx
,
hiddenStates
,
m
.
eps
)
hiddenStates
=
m
.
Output
.
Forward
(
ctx
,
hiddenStates
)
return
hiddenStates
,
nil
}
func
(
m
Model
)
Shift
(
ctx
ml
.
Context
,
layer
int
,
key
,
shift
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
ropeDim
:=
cmp
.
Or
(
m
.
ropeDim
,
m
.
hiddenSize
/
m
.
numHeads
)
return
fast
.
RoPE
(
ctx
,
key
,
shift
,
ropeDim
,
m
.
ropeBase
,
m
.
ropeScale
,
rope
.
WithTypeNeoX
()),
nil
}
func
New
(
c
fs
.
Config
)
(
model
.
Model
,
error
)
{
m
:=
Model
{
Layers
:
make
([]
DecoderLayer
,
c
.
Uint
(
"block_count"
)),
BytePairEncoding
:
model
.
NewBytePairEncoding
(
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Types
:
c
.
Ints
(
"tokenizer.ggml.token_type"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
BOS
:
[]
int32
{
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
))},
AddEOS
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
EOS
:
append
(
[]
int32
{
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
))},
c
.
Ints
(
"tokenizer.ggml.eos_token_ids"
)
...
,
),
},
),
Options
:
Options
{
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
numHeads
:
int
(
c
.
Uint
(
"attention.head_count"
)),
numKVHeads
:
int
(
c
.
Uint
(
"attention.head_count_kv"
)),
headDim
:
int
(
c
.
Uint
(
"attention.key_length"
)),
ropeDim
:
int
(
c
.
Uint
(
"rope.dimension_count"
)),
ropeBase
:
c
.
Float
(
"rope.freq_base"
),
ropeScale
:
c
.
Float
(
"rope.freq_scale"
,
1
),
eps
:
c
.
Float
(
"attention.layer_norm_rms_epsilon"
),
},
}
m
.
Cache
=
kvcache
.
NewCausalCache
(
m
.
Shift
)
return
&
m
,
nil
}
func
init
()
{
model
.
Register
(
"qwen2"
,
New
)
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment