Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
f67a6df1
Unverified
Commit
f67a6df1
authored
Oct 30, 2025
by
Michael Yang
Committed by
GitHub
Oct 30, 2025
Browse files
interleaved mrope (#12807)
* ml(ggml): mrope * interleave mrope
parent
75e75d9a
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
209 additions
and
119 deletions
+209
-119
llama/patches/0032-interleave-multi-rope.patch
llama/patches/0032-interleave-multi-rope.patch
+113
-0
ml/backend/ggml/ggml.go
ml/backend/ggml/ggml.go
+33
-19
ml/backend/ggml/ggml/src/ggml-cpu/ops.cpp
ml/backend/ggml/ggml/src/ggml-cpu/ops.cpp
+2
-5
ml/backend/ggml/ggml/src/ggml-cuda/rope.cu
ml/backend/ggml/ggml/src/ggml-cuda/rope.cu
+3
-9
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
+3
-7
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
+3
-7
ml/backend/ggml/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
.../ggml/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
+3
-9
ml/nn/rope/rope.go
ml/nn/rope/rope.go
+28
-14
model/models/qwen3vl/model.go
model/models/qwen3vl/model.go
+8
-6
model/models/qwen3vl/model_text.go
model/models/qwen3vl/model_text.go
+13
-43
No files found.
llama/patches/0032-interleave-multi-rope.patch
0 → 100644
View file @
f67a6df1
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <git@mxy.ng>
Date: Web, 16 Oct 2025 20:37:19 -0700
Subject: [PATCH] interleave multi rope
since ollama doesn't use mrope for anything else, change it to mean the
interleaved version used for qwen3vl
---
ggml/src/ggml-cpu/ops.cpp | 7 ++-----
ggml/src/ggml-cuda/rope.cu | 12 +++---------
ggml/src/ggml-metal/ggml-metal.metal | 10 +++-------
ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp | 12 +++---------
4 files changed, 11 insertions(+), 30 deletions(-)
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
index 31478dd8e..4d1ed207e 100644
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -5509,15 +5509,12 @@
static void ggml_mrope_cache_init(
}
float theta = theta_t;
- if (sector >= sections[0] && sector < sec_w) {
+ if (sector % 3 == 1 && sector < 1 + 3 * sections[1]) {
theta = theta_h;
}
- else if (sector >= sec_w && sector < sec_w + sections[2]) {
+ else if (sector % 3 == 2 && sector < 2 + 3 * sections[2]) {
theta = theta_w;
}
- else if (sector >= sec_w + sections[2]) {
- theta = theta_e;
- }
rope_yarn(
theta/ff, freq_scale, corr_dims, i0, ext_factor, mscale, &cache[i0 + 0], &cache[i0 + 1]
diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu
index d058504cd..287fe9d2c 100644
--- a/ggml/src/ggml-cuda/rope.cu
+++ b/ggml/src/ggml-cuda/rope.cu
@@ -151,19 +151,13 @@
static __global__ void rope_multi(
const int sec_w = sections.v[1] + sections.v[0];
const int sector = (i0 / 2) % sect_dims;
- float theta_base = 0.0;
- if (sector < sections.v[0]) {
- theta_base = pos[channel_x]*powf(theta_scale, i0/2.0f);
- }
- else if (sector >= sections.v[0] && sector < sec_w) {
+ float theta_base = pos[channel_x]*powf(theta_scale, i0/2.0f);
+ if (sector % 3 == 1 && sector < 1 + 3 * sections.v[1]) {
theta_base = pos[channel_x + ne2 * 1]*powf(theta_scale, i0/2.0f);
}
- else if (sector >= sec_w && sector < sec_w + sections.v[2]) {
+ else if (sector % 3 == 2 && sector < 2 + 3 * sections.v[2]) {
theta_base = pos[channel_x + ne2 * 2]*powf(theta_scale, i0/2.0f);
}
- else if (sector >= sec_w + sections.v[2]) {
- theta_base = pos[channel_x + ne2 * 3]*powf(theta_scale, i0/2.0f);
- }
const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f;
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
index 375a0c7fd..9866c96b4 100644
--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -3858,15 +3858,11 @@
kernel void kernel_rope_multi(
const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2
const int sector = ic % sect_dims;
- float theta_base;
- if (sector < args.sect_0) {
- theta_base = (float) pos[i2];
- } else if (sector < sec_w01) {
+ float theta_base = (float) pos[i2];
+ if (sector % 3 == 1 && sector < 1 + 3 * args.sect_1) {
theta_base = (float) pos[i2 + args.ne02];
- } else if (sector < sec_w012) {
+ } else if (sector % 3 == 2 && sector < 2 + 3 * args.sect_2) {
theta_base = (float) pos[i2 + args.ne02 * 2];
- } else {
- theta_base = (float) pos[i2 + args.ne02 * 3];
}
// end of mrope
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
index 111286b49..6fc2b42f8 100644
--- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
@@ -31,19 +31,13 @@
void main() {
const int sec_w = p.sections[1] + p.sections[0];
const uint sector = (i0 / 2) % sect_dims;
- float theta_base = 0.0;
- if (sector < p.sections[0]) {
- theta_base = data_pos[channel_x]*pow(p.theta_scale, i0/2.0f);
- }
- else if (sector >= p.sections[0] && sector < sec_w) {
+ float theta_base = data_pos[channel_x]*pow(p.theta_scale, i0/2.0f);
+ if (sector % 3 == 1 && sector < 1 + 3 * p.sections[1]) {
theta_base = data_pos[channel_x + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
}
- else if (sector >= sec_w && sector < sec_w + p.sections[2]) {
+ else if (sector % 3 == 2 && sector < 2 + 3 * p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
}
- else if (sector >= sec_w + p.sections[2]) {
- theta_base = data_pos[channel_x + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
- }
const float freq_factor = p.has_ff != 0 ? data_ff[i0/2] : 1.0f;
ml/backend/ggml/ggml.go
View file @
f67a6df1
...
@@ -11,6 +11,7 @@ package ggml
...
@@ -11,6 +11,7 @@ package ggml
import
"C"
import
"C"
import
(
import
(
"cmp"
"context"
"context"
"encoding/binary"
"encoding/binary"
"errors"
"errors"
...
@@ -1490,14 +1491,7 @@ func (t *Tensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
...
@@ -1490,14 +1491,7 @@ func (t *Tensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
func
(
t
*
Tensor
)
RoPE
(
ctx
ml
.
Context
,
positions
ml
.
Tensor
,
ropeDim
int
,
ropeBase
,
ropeScale
float32
,
options
...
func
(
*
rope
.
Options
))
ml
.
Tensor
{
func
(
t
*
Tensor
)
RoPE
(
ctx
ml
.
Context
,
positions
ml
.
Tensor
,
ropeDim
int
,
ropeBase
,
ropeScale
float32
,
options
...
func
(
*
rope
.
Options
))
ml
.
Tensor
{
// Default options
// Default options
opts
:=
rope
.
Options
{
opts
:=
rope
.
Options
{
Factors
:
&
Tensor
{}}
Factors
:
&
Tensor
{},
OriginalContextLength
:
131072
,
ExtrapolationFactor
:
0.
,
AttentionFactor
:
1.
,
BetaFast
:
32.
,
BetaSlow
:
1.
,
}
// Apply any provided options
// Apply any provided options
for
_
,
option
:=
range
options
{
for
_
,
option
:=
range
options
{
...
@@ -1509,24 +1503,44 @@ func (t *Tensor) RoPE(ctx ml.Context, positions ml.Tensor, ropeDim int, ropeBase
...
@@ -1509,24 +1503,44 @@ func (t *Tensor) RoPE(ctx ml.Context, positions ml.Tensor, ropeDim int, ropeBase
dequant
=
C
.
ggml_cast
(
ctx
.
(
*
Context
)
.
ctx
,
t
.
t
,
C
.
GGML_TYPE_F32
)
dequant
=
C
.
ggml_cast
(
ctx
.
(
*
Context
)
.
ctx
,
t
.
t
,
C
.
GGML_TYPE_F32
)
}
}
return
&
Tensor
{
var
tt
*
C
.
struct_ggml_tensor
b
:
t
.
b
,
if
len
(
opts
.
MRoPE
.
Sections
)
>
0
{
t
:
C
.
ggml_rope_ext
(
mropeSections
:=
make
([]
C
.
int32_t
,
4
)
for
i
,
section
:=
range
opts
.
MRoPE
.
Sections
{
mropeSections
[
i
]
=
C
.
int32_t
(
section
)
}
tt
=
C
.
ggml_rope_multi
(
ctx
.
(
*
Context
)
.
ctx
,
ctx
.
(
*
Context
)
.
ctx
,
dequant
,
dequant
,
positions
.
(
*
Tensor
)
.
t
,
positions
.
(
*
Tensor
)
.
t
,
opts
.
Factors
.
(
*
Tensor
)
.
t
,
opts
.
Factors
.
(
*
Tensor
)
.
t
,
C
.
int
(
ropeDim
),
C
.
int
(
ropeDim
),
unsafe
.
SliceData
(
mropeSections
),
C
.
int
(
opts
.
Type
),
C
.
int
(
opts
.
Type
),
C
.
int
(
opts
.
OriginalContextLength
),
cmp
.
Or
(
C
.
int
(
opts
.
YaRN
.
OriginalContextLength
),
128
<<
10
),
C
.
float
(
ropeBase
),
C
.
float
(
ropeBase
),
C
.
float
(
ropeScale
),
C
.
float
(
ropeScale
),
C
.
float
(
opts
.
YaRN
.
ExtrapolationFactor
),
C
.
float
(
opts
.
ExtrapolationFactor
),
cmp
.
Or
(
C
.
float
(
opts
.
YaRN
.
AttentionFactor
),
1
),
C
.
float
(
opts
.
AttentionFactor
),
cmp
.
Or
(
C
.
float
(
opts
.
YaRN
.
BetaFast
),
32
),
C
.
float
(
opts
.
BetaFast
),
cmp
.
Or
(
C
.
float
(
opts
.
YaRN
.
BetaSlow
),
1
),
C
.
float
(
opts
.
BetaSlow
),
)
),
}
else
{
tt
=
C
.
ggml_rope_ext
(
ctx
.
(
*
Context
)
.
ctx
,
dequant
,
positions
.
(
*
Tensor
)
.
t
,
opts
.
Factors
.
(
*
Tensor
)
.
t
,
C
.
int
(
ropeDim
),
C
.
int
(
opts
.
Type
),
cmp
.
Or
(
C
.
int
(
opts
.
YaRN
.
OriginalContextLength
),
128
<<
10
),
C
.
float
(
ropeBase
),
C
.
float
(
ropeScale
),
C
.
float
(
opts
.
YaRN
.
ExtrapolationFactor
),
cmp
.
Or
(
C
.
float
(
opts
.
YaRN
.
AttentionFactor
),
1
),
cmp
.
Or
(
C
.
float
(
opts
.
YaRN
.
BetaFast
),
32
),
cmp
.
Or
(
C
.
float
(
opts
.
YaRN
.
BetaSlow
),
1
),
)
}
}
return
&
Tensor
{
b
:
t
.
b
,
t
:
tt
}
}
}
func
(
t
*
Tensor
)
IM2Col
(
ctx
ml
.
Context
,
t2
ml
.
Tensor
,
s0
,
s1
,
p0
,
p1
,
d0
,
d1
int
)
ml
.
Tensor
{
func
(
t
*
Tensor
)
IM2Col
(
ctx
ml
.
Context
,
t2
ml
.
Tensor
,
s0
,
s1
,
p0
,
p1
,
d0
,
d1
int
)
ml
.
Tensor
{
...
...
ml/backend/ggml/ggml/src/ggml-cpu/ops.cpp
View file @
f67a6df1
...
@@ -5509,15 +5509,12 @@ static void ggml_mrope_cache_init(
...
@@ -5509,15 +5509,12 @@ static void ggml_mrope_cache_init(
}
}
float
theta
=
theta_t
;
float
theta
=
theta_t
;
if
(
sector
>=
sections
[
0
]
&&
sector
<
sec_w
)
{
if
(
sector
%
3
==
1
&&
sector
<
1
+
3
*
sections
[
1
]
)
{
theta
=
theta_h
;
theta
=
theta_h
;
}
}
else
if
(
sector
>=
sec_w
&&
sector
<
sec_w
+
sections
[
2
])
{
else
if
(
sector
%
3
==
2
&&
sector
<
2
+
3
*
sections
[
2
])
{
theta
=
theta_w
;
theta
=
theta_w
;
}
}
else
if
(
sector
>=
sec_w
+
sections
[
2
])
{
theta
=
theta_e
;
}
rope_yarn
(
rope_yarn
(
theta
/
ff
,
freq_scale
,
corr_dims
,
i0
,
ext_factor
,
mscale
,
&
cache
[
i0
+
0
],
&
cache
[
i0
+
1
]
theta
/
ff
,
freq_scale
,
corr_dims
,
i0
,
ext_factor
,
mscale
,
&
cache
[
i0
+
0
],
&
cache
[
i0
+
1
]
...
...
ml/backend/ggml/ggml/src/ggml-cuda/rope.cu
View file @
f67a6df1
...
@@ -151,19 +151,13 @@ static __global__ void rope_multi(
...
@@ -151,19 +151,13 @@ static __global__ void rope_multi(
const
int
sec_w
=
sections
.
v
[
1
]
+
sections
.
v
[
0
];
const
int
sec_w
=
sections
.
v
[
1
]
+
sections
.
v
[
0
];
const
int
sector
=
(
i0
/
2
)
%
sect_dims
;
const
int
sector
=
(
i0
/
2
)
%
sect_dims
;
float
theta_base
=
0.0
;
float
theta_base
=
pos
[
channel_x
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
if
(
sector
<
sections
.
v
[
0
])
{
if
(
sector
%
3
==
1
&&
sector
<
1
+
3
*
sections
.
v
[
1
])
{
theta_base
=
pos
[
channel_x
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
else
if
(
sector
>=
sections
.
v
[
0
]
&&
sector
<
sec_w
)
{
theta_base
=
pos
[
channel_x
+
ne2
*
1
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
theta_base
=
pos
[
channel_x
+
ne2
*
1
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
}
else
if
(
sector
>=
sec_w
&&
sector
<
sec_w
+
sections
.
v
[
2
])
{
else
if
(
sector
%
3
==
2
&&
sector
<
2
+
3
*
sections
.
v
[
2
])
{
theta_base
=
pos
[
channel_x
+
ne2
*
2
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
theta_base
=
pos
[
channel_x
+
ne2
*
2
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
}
else
if
(
sector
>=
sec_w
+
sections
.
v
[
2
])
{
theta_base
=
pos
[
channel_x
+
ne2
*
3
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
const
float
freq_factor
=
has_ff
?
freq_factors
[
i0
/
2
]
:
1.0
f
;
const
float
freq_factor
=
has_ff
?
freq_factors
[
i0
/
2
]
:
1.0
f
;
...
...
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
View file @
f67a6df1
...
@@ -6523,15 +6523,11 @@ kernel void kernel_rope_multi(
...
@@ -6523,15 +6523,11 @@ kernel void kernel_rope_multi(
const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2
const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2
const int sector = ic % sect_dims;
const int sector = ic % sect_dims;
float theta_base;
float theta_base = (float) pos[i2];
if (sector < args.sect_0) {
if (sector % 3 == 1 && sector < 1 + 3 * args.sect_1) {
theta_base = (float) pos[i2];
} else if (sector < sec_w01) {
theta_base = (float) pos[i2 + args.ne02];
theta_base = (float) pos[i2 + args.ne02];
} else if (sector
< sec_w01
2) {
} else if (sector
% 3 == 2 && sector < 2 + 3 * args.sect_
2) {
theta_base = (float) pos[i2 + args.ne02 * 2];
theta_base = (float) pos[i2 + args.ne02 * 2];
} else {
theta_base = (float) pos[i2 + args.ne02 * 3];
}
}
// end of mrope
// end of mrope
...
...
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
View file @
f67a6df1
...
@@ -3858,15 +3858,11 @@ kernel void kernel_rope_multi(
...
@@ -3858,15 +3858,11 @@ kernel void kernel_rope_multi(
const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2
const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2
const int sector = ic % sect_dims;
const int sector = ic % sect_dims;
float theta_base;
float theta_base = (float) pos[i2];
if (sector < args.sect_0) {
if (sector % 3 == 1 && sector < 1 + 3 * args.sect_1) {
theta_base = (float) pos[i2];
} else if (sector < sec_w01) {
theta_base = (float) pos[i2 + args.ne02];
theta_base = (float) pos[i2 + args.ne02];
} else if (sector
< sec_w01
2) {
} else if (sector
% 3 == 2 && sector < 2 + 3 * args.sect_
2) {
theta_base = (float) pos[i2 + args.ne02 * 2];
theta_base = (float) pos[i2 + args.ne02 * 2];
} else {
theta_base = (float) pos[i2 + args.ne02 * 3];
}
}
// end of mrope
// end of mrope
...
...
ml/backend/ggml/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
View file @
f67a6df1
...
@@ -31,19 +31,13 @@ void main() {
...
@@ -31,19 +31,13 @@ void main() {
const int sec_w = p.sections[1] + p.sections[0];
const int sec_w = p.sections[1] + p.sections[0];
const uint sector = (i0 / 2) % sect_dims;
const uint sector = (i0 / 2) % sect_dims;
float theta_base = 0.0;
float theta_base = data_pos[channel_x]*pow(p.theta_scale, i0/2.0f);
if (sector < p.sections[0]) {
if (sector % 3 == 1 && sector < 1 + 3 * p.sections[1]) {
theta_base = data_pos[channel_x]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= p.sections[0] && sector < sec_w) {
theta_base = data_pos[channel_x + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
theta_base = data_pos[channel_x + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
}
}
else if (sector
>= sec_w
&& sector <
sec_w +
p.sections[2]) {
else if (sector
% 3 == 2
&& sector <
2 + 3 *
p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
theta_base = data_pos[channel_x + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
}
}
else if (sector >= sec_w + p.sections[2]) {
theta_base = data_pos[channel_x + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
}
const float freq_factor = p.has_ff != 0 ? data_ff[i0/2] : 1.0f;
const float freq_factor = p.has_ff != 0 ? data_ff[i0/2] : 1.0f;
...
...
ml/nn/rope/rope.go
View file @
f67a6df1
...
@@ -6,19 +6,19 @@ import "github.com/ollama/ollama/ml"
...
@@ -6,19 +6,19 @@ import "github.com/ollama/ollama/ml"
type
Options
struct
{
type
Options
struct
{
Type
int
Type
int
Factors
ml
.
Tensor
Factors
ml
.
Tensor
OriginalContextLength
int
// YaRN options
// YaRN options
YaRN
struct
{
OriginalContextLength
int
ExtrapolationFactor
,
ExtrapolationFactor
,
AttentionFactor
,
AttentionFactor
,
BetaFast
,
BetaFast
,
BetaSlow
float32
BetaSlow
float32
}
}
// WithOriginalContextLength sets a custom context length
// MRoPE options
func
WithOriginalContextLength
(
n
int
)
func
(
*
Options
)
{
MRoPE
struct
{
return
func
(
opts
*
Options
)
{
Sections
[]
int
opts
.
OriginalContextLength
=
n
}
}
}
}
...
@@ -38,14 +38,28 @@ func WithFactors(factors ml.Tensor) func(*Options) {
...
@@ -38,14 +38,28 @@ func WithFactors(factors ml.Tensor) func(*Options) {
}
}
}
}
// WithOriginalContextLength sets a custom context length
func
WithOriginalContextLength
(
n
int
)
func
(
*
Options
)
{
return
func
(
opts
*
Options
)
{
opts
.
YaRN
.
OriginalContextLength
=
n
}
}
func
WithExtrapolationFactor
(
extrapolationFactor
float32
)
func
(
*
Options
)
{
func
WithExtrapolationFactor
(
extrapolationFactor
float32
)
func
(
*
Options
)
{
return
func
(
opts
*
Options
)
{
return
func
(
opts
*
Options
)
{
opts
.
ExtrapolationFactor
=
extrapolationFactor
opts
.
YaRN
.
ExtrapolationFactor
=
extrapolationFactor
}
}
}
}
func
WithAttentionFactor
(
attentionFactor
float32
)
func
(
*
Options
)
{
func
WithAttentionFactor
(
attentionFactor
float32
)
func
(
*
Options
)
{
return
func
(
opts
*
Options
)
{
return
func
(
opts
*
Options
)
{
opts
.
AttentionFactor
=
attentionFactor
opts
.
YaRN
.
AttentionFactor
=
attentionFactor
}
}
func
WithMRoPESections
(
sections
[]
int
)
func
(
*
Options
)
{
return
func
(
opts
*
Options
)
{
opts
.
Type
|=
1
<<
3
opts
.
MRoPE
.
Sections
=
sections
}
}
}
}
model/models/qwen3vl/model.go
View file @
f67a6df1
...
@@ -112,7 +112,8 @@ func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error) {
...
@@ -112,7 +112,8 @@ func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error) {
}
}
func
(
m
*
Model
)
Forward
(
ctx
ml
.
Context
,
batch
input
.
Batch
)
(
ml
.
Tensor
,
error
)
{
func
(
m
*
Model
)
Forward
(
ctx
ml
.
Context
,
batch
input
.
Batch
)
(
ml
.
Tensor
,
error
)
{
positionSlice
:=
slices
.
Collect
(
makeSlice2D
[
int32
](
3
,
len
(
batch
.
Positions
)))
// ggml mrope requires 4 positions per token: [time, height, width, extra]
positionSlice
:=
slices
.
Collect
(
makeSlice2D
[
int32
](
4
,
len
(
batch
.
Positions
)))
for
i
,
id
:=
range
batch
.
Positions
{
for
i
,
id
:=
range
batch
.
Positions
{
if
id
<
int32
(
len
(
m
.
positionCache
))
{
if
id
<
int32
(
len
(
m
.
positionCache
))
{
id
=
m
.
positionCache
[
id
]
id
=
m
.
positionCache
[
id
]
...
@@ -123,6 +124,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
...
@@ -123,6 +124,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
positionSlice
[
0
][
i
]
=
id
positionSlice
[
0
][
i
]
=
id
positionSlice
[
1
][
i
]
=
id
positionSlice
[
1
][
i
]
=
id
positionSlice
[
2
][
i
]
=
id
positionSlice
[
2
][
i
]
=
id
// positionSlice[3] is intentionally left as zeros
}
}
hiddenStates
:=
m
.
TextModel
.
TokenEmbedding
.
Forward
(
ctx
,
batch
.
Inputs
)
.
Duplicate
(
ctx
)
hiddenStates
:=
m
.
TextModel
.
TokenEmbedding
.
Forward
(
ctx
,
batch
.
Inputs
)
.
Duplicate
(
ctx
)
...
@@ -147,8 +149,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
...
@@ -147,8 +149,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
}
}
}
}
positions
:=
ctx
.
Input
()
.
FromInts
(
slices
.
Concat
(
positionSlice
...
),
len
(
positionSlice
[
0
]),
len
(
positionSlice
))
positions
:=
ctx
.
Input
()
.
FromInts
(
slices
.
Concat
(
positionSlice
...
),
len
(
positionSlice
[
0
])
*
len
(
positionSlice
))
cos
,
sin
:=
m
.
rotaryEmbedding
(
ctx
,
positions
)
for
i
,
layer
:=
range
m
.
TextModel
.
Layers
{
for
i
,
layer
:=
range
m
.
TextModel
.
Layers
{
if
m
.
Cache
!=
nil
{
if
m
.
Cache
!=
nil
{
m
.
Cache
.
SetLayer
(
i
)
m
.
Cache
.
SetLayer
(
i
)
...
@@ -159,7 +160,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
...
@@ -159,7 +160,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
outputs
=
batch
.
Outputs
outputs
=
batch
.
Outputs
}
}
hiddenStates
=
layer
.
Forward
(
ctx
,
hiddenStates
,
c
os
,
sin
,
outputs
,
m
.
Cache
,
m
.
Options
)
hiddenStates
=
layer
.
Forward
(
ctx
,
hiddenStates
,
p
os
itions
,
outputs
,
m
.
Cache
,
m
.
Options
)
if
i
<
len
(
deepstackVisualEmbeds
)
{
if
i
<
len
(
deepstackVisualEmbeds
)
{
hiddenStates
=
hiddenStates
.
Add
(
ctx
,
deepstackVisualEmbeds
[
i
])
hiddenStates
=
hiddenStates
.
Add
(
ctx
,
deepstackVisualEmbeds
[
i
])
}
}
...
@@ -191,9 +192,10 @@ func New(c fs.Config) (model.Model, error) {
...
@@ -191,9 +192,10 @@ func New(c fs.Config) (model.Model, error) {
ImageProcessor
:
newImageProcessor
(
c
),
ImageProcessor
:
newImageProcessor
(
c
),
}
}
m
.
Cache
=
kvcache
.
NewCausalCache
(
func
(
ctx
ml
.
Context
,
layer
int
,
key
,
position
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
m
.
Cache
=
kvcache
.
NewCausalCache
(
func
(
ctx
ml
.
Context
,
layer
int
,
key
,
position
s
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
m
.
positionCache
=
nil
m
.
positionCache
=
nil
return
nil
,
kvcache
.
ErrNotSupported
positions
=
positions
.
Repeat
(
ctx
,
1
,
4
)
.
Reshape
(
ctx
,
-
1
)
return
m
.
Options
.
applyRotaryPositionalEmbedding
(
ctx
,
key
,
positions
),
nil
})
})
return
&
m
,
nil
return
&
m
,
nil
}
}
...
...
model/models/qwen3vl/model_text.go
View file @
f67a6df1
...
@@ -10,6 +10,8 @@ import (
...
@@ -10,6 +10,8 @@ import (
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/ml/nn/fast"
"github.com/ollama/ollama/ml/nn/rope"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model"
)
)
...
@@ -27,14 +29,18 @@ type TextOptions struct {
...
@@ -27,14 +29,18 @@ type TextOptions struct {
numExperts
,
numExpertsUsed
int
numExperts
,
numExpertsUsed
int
normTopKProb
bool
normTopKProb
bool
inverseFrequenciesCache
[]
float32
}
}
func
(
o
TextOptions
)
headDim
()
int
{
func
(
o
TextOptions
)
headDim
()
int
{
return
cmp
.
Or
(
o
.
keyLength
,
o
.
valueLength
,
o
.
hiddenSize
/
o
.
numHeads
)
return
cmp
.
Or
(
o
.
keyLength
,
o
.
valueLength
,
o
.
hiddenSize
/
o
.
numHeads
)
}
}
func
(
o
TextOptions
)
applyRotaryPositionalEmbedding
(
ctx
ml
.
Context
,
t
,
p
ml
.
Tensor
)
ml
.
Tensor
{
return
fast
.
RoPE
(
ctx
,
t
,
p
,
o
.
headDim
(),
o
.
ropeBase
,
1
/
float32
(
math
.
Sqrt
(
float64
(
o
.
ropeScale
))),
rope
.
WithMRoPESections
(
o
.
mropeSections
),
)
}
type
TextAttention
struct
{
type
TextAttention
struct
{
Query
*
nn
.
Linear
`gguf:"attn_q"`
Query
*
nn
.
Linear
`gguf:"attn_q"`
QueryNorm
*
nn
.
RMSNorm
`gguf:"attn_q_norm"`
QueryNorm
*
nn
.
RMSNorm
`gguf:"attn_q_norm"`
...
@@ -44,7 +50,7 @@ type TextAttention struct {
...
@@ -44,7 +50,7 @@ type TextAttention struct {
Output
*
nn
.
Linear
`gguf:"attn_output"`
Output
*
nn
.
Linear
`gguf:"attn_output"`
}
}
func
(
sa
*
TextAttention
)
Forward
(
ctx
ml
.
Context
,
hiddenStates
,
c
os
,
sin
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
TextOptions
)
ml
.
Tensor
{
func
(
sa
*
TextAttention
)
Forward
(
ctx
ml
.
Context
,
hiddenStates
,
p
os
itions
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
TextOptions
)
ml
.
Tensor
{
batchSize
:=
hiddenStates
.
Dim
(
1
)
batchSize
:=
hiddenStates
.
Dim
(
1
)
query
:=
sa
.
Query
.
Forward
(
ctx
,
hiddenStates
)
query
:=
sa
.
Query
.
Forward
(
ctx
,
hiddenStates
)
...
@@ -58,8 +64,8 @@ func (sa *TextAttention) Forward(ctx ml.Context, hiddenStates, cos, sin ml.Tenso
...
@@ -58,8 +64,8 @@ func (sa *TextAttention) Forward(ctx ml.Context, hiddenStates, cos, sin ml.Tenso
query
=
sa
.
QueryNorm
.
Forward
(
ctx
,
query
,
opts
.
eps
)
query
=
sa
.
QueryNorm
.
Forward
(
ctx
,
query
,
opts
.
eps
)
key
=
sa
.
KeyNorm
.
Forward
(
ctx
,
key
,
opts
.
eps
)
key
=
sa
.
KeyNorm
.
Forward
(
ctx
,
key
,
opts
.
eps
)
query
=
applyRotaryPositionalEmbedding
(
ctx
,
query
,
c
os
,
sin
)
query
=
opts
.
applyRotaryPositionalEmbedding
(
ctx
,
query
,
p
os
itions
)
key
=
applyRotaryPositionalEmbedding
(
ctx
,
key
,
c
os
,
sin
)
key
=
opts
.
applyRotaryPositionalEmbedding
(
ctx
,
key
,
p
os
itions
)
attention
:=
nn
.
Attention
(
ctx
,
query
,
key
,
value
,
1.
/
math
.
Sqrt
(
float64
(
opts
.
headDim
())),
cache
)
attention
:=
nn
.
Attention
(
ctx
,
query
,
key
,
value
,
1.
/
math
.
Sqrt
(
float64
(
opts
.
headDim
())),
cache
)
attention
=
attention
.
Reshape
(
ctx
,
attention
.
Dim
(
0
)
*
attention
.
Dim
(
1
),
batchSize
)
attention
=
attention
.
Reshape
(
ctx
,
attention
.
Dim
(
0
)
*
attention
.
Dim
(
1
),
batchSize
)
...
@@ -125,10 +131,10 @@ type TextLayer struct {
...
@@ -125,10 +131,10 @@ type TextLayer struct {
TextMLP
TextMLP
}
}
func
(
d
*
TextLayer
)
Forward
(
ctx
ml
.
Context
,
hiddenStates
,
c
os
,
sin
,
outputs
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
TextOptions
)
ml
.
Tensor
{
func
(
d
*
TextLayer
)
Forward
(
ctx
ml
.
Context
,
hiddenStates
,
p
os
itions
,
outputs
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
TextOptions
)
ml
.
Tensor
{
residual
:=
hiddenStates
residual
:=
hiddenStates
hiddenStates
=
d
.
AttentionNorm
.
Forward
(
ctx
,
hiddenStates
,
opts
.
eps
)
hiddenStates
=
d
.
AttentionNorm
.
Forward
(
ctx
,
hiddenStates
,
opts
.
eps
)
hiddenStates
=
d
.
TextAttention
.
Forward
(
ctx
,
hiddenStates
,
c
os
,
sin
,
cache
,
opts
)
hiddenStates
=
d
.
TextAttention
.
Forward
(
ctx
,
hiddenStates
,
p
os
itions
,
cache
,
opts
)
if
outputs
!=
nil
{
if
outputs
!=
nil
{
hiddenStates
=
hiddenStates
.
Rows
(
ctx
,
outputs
)
hiddenStates
=
hiddenStates
.
Rows
(
ctx
,
outputs
)
...
@@ -153,42 +159,6 @@ type TextModel struct {
...
@@ -153,42 +159,6 @@ type TextModel struct {
Options
*
TextOptions
Options
*
TextOptions
}
}
func
(
m
*
TextModel
)
rotaryEmbedding
(
ctx
ml
.
Context
,
positions
ml
.
Tensor
)
(
_
,
_
ml
.
Tensor
)
{
positions
=
positions
.
Reshape
(
ctx
,
1
,
positions
.
Dim
(
0
),
positions
.
Dim
(
1
))
if
len
(
m
.
Options
.
inverseFrequenciesCache
)
==
0
{
m
.
Options
.
inverseFrequenciesCache
=
make
([]
float32
,
m
.
Options
.
headDim
()
/
2
)
for
i
:=
range
m
.
Options
.
inverseFrequenciesCache
{
frequency
:=
float32
(
math
.
Pow
(
float64
(
m
.
Options
.
ropeBase
),
float64
(
i
*
2
)
/
float64
(
m
.
Options
.
headDim
())))
m
.
Options
.
inverseFrequenciesCache
[
i
]
=
1
/
frequency
}
}
inverseFrequencies
:=
ctx
.
Input
()
.
FromFloats
(
m
.
Options
.
inverseFrequenciesCache
,
1
,
len
(
m
.
Options
.
inverseFrequenciesCache
))
positions
=
positions
.
Cast
(
ctx
,
ml
.
DTypeF32
)
frequencies
:=
inverseFrequencies
.
Mulmat
(
ctx
,
positions
)
interleaved
:=
frequencies
.
View
(
ctx
,
0
,
frequencies
.
Dim
(
0
),
frequencies
.
Stride
(
1
),
frequencies
.
Dim
(
1
),
)
for
_
,
i
:=
range
[]
int
{
1
,
2
}
{
args
:=
[]
int
{
i
*
frequencies
.
Stride
(
0
),
1
,
3
*
frequencies
.
Stride
(
0
),
m
.
Options
.
mropeSections
[
i
],
frequencies
.
Stride
(
1
),
frequencies
.
Dim
(
1
),
}
ctx
.
Forward
(
frequencies
.
View
(
ctx
,
i
*
frequencies
.
Stride
(
2
)
+
args
[
0
],
args
[
1
:
]
...
)
.
Copy
(
ctx
,
interleaved
.
View
(
ctx
,
args
[
0
],
args
[
1
:
]
...
)))
}
interleaved
=
interleaved
.
Concat
(
ctx
,
interleaved
,
0
)
interleaved
=
interleaved
.
Reshape
(
ctx
,
interleaved
.
Dim
(
0
),
1
,
interleaved
.
Dim
(
1
),
interleaved
.
Dim
(
2
))
return
interleaved
.
Cos
(
ctx
),
interleaved
.
Sin
(
ctx
)
}
var
_
model
.
Model
=
(
*
Model
)(
nil
)
var
_
model
.
Model
=
(
*
Model
)(
nil
)
func
newTextModel
(
c
fs
.
Config
)
*
TextModel
{
func
newTextModel
(
c
fs
.
Config
)
*
TextModel
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment