Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
orangecat
ollama
Commits
7a81daf0
"run.sh" did not exist on "6dc0f69862be6b01f8bd9ad82a1a785f40de464f"
Unverified
Commit
7a81daf0
authored
Dec 14, 2024
by
Jeffrey Morgan
Committed by
GitHub
Dec 14, 2024
Browse files
llama: update vendor code to commit ba1cb19c (#8101)
parent
60f75560
Changes
273
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
249 additions
and
24 deletions
+249
-24
llama/ggml-cuda/norm.cuh
llama/ggml-cuda/norm.cuh
+1
-1
llama/ggml-cuda/opt-step-adamw.cu
llama/ggml-cuda/opt-step-adamw.cu
+1
-1
llama/ggml-cuda/opt-step-adamw.cuh
llama/ggml-cuda/opt-step-adamw.cuh
+1
-1
llama/ggml-cuda/out-prod.cu
llama/ggml-cuda/out-prod.cu
+1
-1
llama/ggml-cuda/out-prod.cuh
llama/ggml-cuda/out-prod.cuh
+1
-1
llama/ggml-cuda/pad.cu
llama/ggml-cuda/pad.cu
+1
-1
llama/ggml-cuda/pad.cuh
llama/ggml-cuda/pad.cuh
+1
-1
llama/ggml-cuda/pool2d.cu
llama/ggml-cuda/pool2d.cu
+1
-1
llama/ggml-cuda/pool2d.cuh
llama/ggml-cuda/pool2d.cuh
+1
-1
llama/ggml-cuda/quantize.cu
llama/ggml-cuda/quantize.cu
+1
-1
llama/ggml-cuda/quantize.cuh
llama/ggml-cuda/quantize.cuh
+1
-1
llama/ggml-cuda/rope.cu
llama/ggml-cuda/rope.cu
+230
-3
llama/ggml-cuda/rope.cuh
llama/ggml-cuda/rope.cuh
+1
-1
llama/ggml-cuda/scale.cu
llama/ggml-cuda/scale.cu
+1
-1
llama/ggml-cuda/scale.cuh
llama/ggml-cuda/scale.cuh
+1
-1
llama/ggml-cuda/softmax.cu
llama/ggml-cuda/softmax.cu
+1
-1
llama/ggml-cuda/softmax.cuh
llama/ggml-cuda/softmax.cuh
+1
-1
llama/ggml-cuda/sum.cu
llama/ggml-cuda/sum.cu
+1
-3
llama/ggml-cuda/sum.cuh
llama/ggml-cuda/sum.cuh
+1
-1
llama/ggml-cuda/sumrows.cu
llama/ggml-cuda/sumrows.cu
+1
-1
No files found.
llama/ggml-cuda/norm.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/opt-step-adamw.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/opt-step-adamw.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/out-prod.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/out-prod.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/pad.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/pad.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/pool2d.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/pool2d.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/quantize.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/quantize.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/rope.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
@@ -30,6 +30,11 @@ struct rope_corr_dims {
float
v
[
2
];
};
struct
mrope_sections
{
int
v
[
4
];
};
static
__device__
float
rope_yarn_ramp
(
const
float
low
,
const
float
high
,
const
int
i0
)
{
const
float
y
=
(
i0
/
2
-
low
)
/
max
(
0.001
f
,
high
-
low
);
return
1.0
f
-
min
(
1.0
f
,
max
(
0.0
f
,
y
));
...
...
@@ -134,6 +139,105 @@ static __global__ void rope_neox(
dst
[
i
+
n_dims
/
2
]
=
x0
*
sin_theta
+
x1
*
cos_theta
;
}
template
<
typename
T
,
bool
has_ff
>
static
__global__
void
rope_multi
(
const
T
*
x
,
T
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
float
theta_scale
,
const
float
*
freq_factors
,
mrope_sections
sections
)
{
const
int
i0
=
2
*
(
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
);
if
(
i0
>=
ne0
)
{
return
;
}
const
int
row
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
if
(
i0
>=
n_dims
)
{
const
int
i
=
row
*
ne0
+
i0
;
dst
[
i
+
0
]
=
x
[
i
+
0
];
dst
[
i
+
1
]
=
x
[
i
+
1
];
return
;
}
const
int
i
=
row
*
ne0
+
i0
/
2
;
const
int
i2
=
row
/
p_delta_rows
;
int
sect_dims
=
sections
.
v
[
0
]
+
sections
.
v
[
1
]
+
sections
.
v
[
2
]
+
sections
.
v
[
3
];
int
sec_w
=
sections
.
v
[
1
]
+
sections
.
v
[
0
];
int
sector
=
(
i0
/
2
)
%
sect_dims
;
float
theta_base
=
0.0
;
if
(
sector
<
sections
.
v
[
0
])
{
theta_base
=
pos
[
i2
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
else
if
(
sector
>=
sections
.
v
[
0
]
&&
sector
<
sec_w
)
{
theta_base
=
pos
[
i2
+
ne2
*
1
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
else
if
(
sector
>=
sec_w
&&
sector
<
sec_w
+
sections
.
v
[
2
])
{
theta_base
=
pos
[
i2
+
ne2
*
2
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
else
if
(
sector
>=
sec_w
+
sections
.
v
[
2
])
{
theta_base
=
pos
[
i2
+
ne2
*
3
]
*
powf
(
theta_scale
,
i0
/
2.0
f
);
}
const
float
freq_factor
=
has_ff
?
freq_factors
[
i0
/
2
]
:
1.0
f
;
float
cos_theta
;
float
sin_theta
;
rope_yarn
(
theta_base
/
freq_factor
,
freq_scale
,
corr_dims
,
i0
,
ext_factor
,
attn_factor
,
&
cos_theta
,
&
sin_theta
);
const
float
x0
=
x
[
i
+
0
];
const
float
x1
=
x
[
i
+
n_dims
/
2
];
dst
[
i
+
0
]
=
x0
*
cos_theta
-
x1
*
sin_theta
;
dst
[
i
+
n_dims
/
2
]
=
x0
*
sin_theta
+
x1
*
cos_theta
;
}
template
<
typename
T
,
bool
has_ff
>
static
__global__
void
rope_vision
(
const
T
*
x
,
T
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
float
theta_scale
,
const
float
*
freq_factors
,
mrope_sections
sections
)
{
const
int
i0
=
2
*
(
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
);
if
(
i0
>=
ne0
)
{
return
;
}
const
int
row
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
const
int
i
=
row
*
ne0
+
i0
/
2
;
const
int
i2
=
row
/
p_delta_rows
;
// i2-th tokens
int
sect_dims
=
sections
.
v
[
0
]
+
sections
.
v
[
1
];
int
sec_w
=
sections
.
v
[
1
]
+
sections
.
v
[
0
];
int
sector
=
(
i0
/
2
)
%
sect_dims
;
float
theta_base
=
0.0
;
if
(
sector
<
sections
.
v
[
0
])
{
const
int
p
=
sector
;
theta_base
=
pos
[
i2
]
*
powf
(
theta_scale
,
p
);
}
else
if
(
sector
>=
sections
.
v
[
0
]
&&
sector
<
sec_w
)
{
const
int
p
=
sector
-
sections
.
v
[
0
];
theta_base
=
pos
[
i2
+
ne2
]
*
powf
(
theta_scale
,
p
);
}
const
float
freq_factor
=
has_ff
?
freq_factors
[
i0
/
2
]
:
1.0
f
;
float
cos_theta
;
float
sin_theta
;
rope_yarn
(
theta_base
/
freq_factor
,
freq_scale
,
corr_dims
,
i0
,
ext_factor
,
attn_factor
,
&
cos_theta
,
&
sin_theta
);
const
float
x0
=
x
[
i
+
0
];
const
float
x1
=
x
[
i
+
n_dims
];
dst
[
i
+
0
]
=
x0
*
cos_theta
-
x1
*
sin_theta
;
dst
[
i
+
n_dims
]
=
x0
*
sin_theta
+
x1
*
cos_theta
;
}
template
<
typename
T
>
static
void
rope_norm_cuda
(
const
T
*
x
,
T
*
dst
,
int
ne0
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
...
...
@@ -182,6 +286,56 @@ static void rope_neox_cuda(
}
}
template
<
typename
T
>
static
void
rope_multi_cuda
(
const
T
*
x
,
T
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
freq_base
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
const
float
*
freq_factors
,
mrope_sections
sections
,
cudaStream_t
stream
)
{
GGML_ASSERT
(
ne0
%
2
==
0
);
const
dim3
block_dims
(
1
,
CUDA_ROPE_BLOCK_SIZE
,
1
);
const
int
n_blocks_x
=
(
ne0
+
2
*
CUDA_ROPE_BLOCK_SIZE
-
1
)
/
(
2
*
CUDA_ROPE_BLOCK_SIZE
);
const
dim3
block_nums
(
nr
,
n_blocks_x
,
1
);
const
float
theta_scale
=
powf
(
freq_base
,
-
2.0
f
/
n_dims
);
if
(
freq_factors
==
nullptr
)
{
rope_multi
<
T
,
false
><<<
block_nums
,
block_dims
,
0
,
stream
>>>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
pos
,
freq_scale
,
p_delta_rows
,
ext_factor
,
attn_factor
,
corr_dims
,
theta_scale
,
freq_factors
,
sections
);
}
else
{
rope_multi
<
T
,
true
><<<
block_nums
,
block_dims
,
0
,
stream
>>>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
pos
,
freq_scale
,
p_delta_rows
,
ext_factor
,
attn_factor
,
corr_dims
,
theta_scale
,
freq_factors
,
sections
);
}
}
template
<
typename
T
>
static
void
rope_vision_cuda
(
const
T
*
x
,
T
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
freq_base
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
const
float
*
freq_factors
,
mrope_sections
sections
,
cudaStream_t
stream
)
{
GGML_ASSERT
(
ne0
%
2
==
0
);
const
dim3
block_dims
(
1
,
CUDA_ROPE_BLOCK_SIZE
,
1
);
const
int
n_blocks_x
=
(
ne0
+
2
*
CUDA_ROPE_BLOCK_SIZE
-
1
)
/
(
2
*
CUDA_ROPE_BLOCK_SIZE
);
const
dim3
block_nums
(
nr
,
n_blocks_x
,
1
);
// break down (head_dim, heads, seq) into (CUDA_ROPE_BLOCK_SIZE, x, heads * seq)
// where x ~= ceil(head_dim / CUDA_ROPE_BLOCK_SIZE);
const
float
theta_scale
=
powf
(
freq_base
,
-
2.0
f
/
n_dims
);
if
(
freq_factors
==
nullptr
)
{
rope_vision
<
T
,
false
><<<
block_nums
,
block_dims
,
0
,
stream
>>>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
pos
,
freq_scale
,
p_delta_rows
,
ext_factor
,
attn_factor
,
corr_dims
,
theta_scale
,
freq_factors
,
sections
);
}
else
{
rope_vision
<
T
,
true
><<<
block_nums
,
block_dims
,
0
,
stream
>>>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
pos
,
freq_scale
,
p_delta_rows
,
ext_factor
,
attn_factor
,
corr_dims
,
theta_scale
,
freq_factors
,
sections
);
}
}
static
void
rope_norm_cuda_f16
(
const
half
*
x
,
half
*
dst
,
int
ne0
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
freq_base
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
const
float
*
freq_factors
,
cudaStream_t
stream
)
{
...
...
@@ -211,6 +365,38 @@ static void rope_neox_cuda_f32(
rope_neox_cuda
<
float
>
(
x
,
dst
,
ne0
,
n_dims
,
nr
,
pos
,
freq_scale
,
p_delta_rows
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
stream
);
}
static
void
rope_multi_cuda_f16
(
const
half
*
x
,
half
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
freq_base
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
const
float
*
freq_factors
,
mrope_sections
sections
,
cudaStream_t
stream
)
{
rope_multi_cuda
<
half
>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
nr
,
pos
,
freq_scale
,
p_delta_rows
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
static
void
rope_multi_cuda_f32
(
const
float
*
x
,
float
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
freq_base
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
const
float
*
freq_factors
,
mrope_sections
sections
,
cudaStream_t
stream
)
{
rope_multi_cuda
<
float
>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
nr
,
pos
,
freq_scale
,
p_delta_rows
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
static
void
rope_vision_cuda_f16
(
const
half
*
x
,
half
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
freq_base
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
const
float
*
freq_factors
,
mrope_sections
sections
,
cudaStream_t
stream
)
{
rope_vision_cuda
<
half
>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
nr
,
pos
,
freq_scale
,
p_delta_rows
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
static
void
rope_vision_cuda_f32
(
const
float
*
x
,
float
*
dst
,
int
ne0
,
int
ne2
,
int
n_dims
,
int
nr
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
freq_base
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
const
float
*
freq_factors
,
mrope_sections
sections
,
cudaStream_t
stream
)
{
rope_vision_cuda
<
float
>
(
x
,
dst
,
ne0
,
ne2
,
n_dims
,
nr
,
pos
,
freq_scale
,
p_delta_rows
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
void
ggml_cuda_op_rope
(
ggml_backend_cuda_context
&
ctx
,
ggml_tensor
*
dst
)
{
const
ggml_tensor
*
src0
=
dst
->
src
[
0
];
const
ggml_tensor
*
src1
=
dst
->
src
[
1
];
...
...
@@ -227,8 +413,9 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
GGML_ASSERT
(
dst
->
type
==
GGML_TYPE_F32
||
dst
->
type
==
GGML_TYPE_F16
);
GGML_ASSERT
(
src0
->
type
==
dst
->
type
);
const
int64_t
ne00
=
src0
->
ne
[
0
];
const
int64_t
ne01
=
src0
->
ne
[
1
];
const
int64_t
ne00
=
src0
->
ne
[
0
];
// head dims
const
int64_t
ne01
=
src0
->
ne
[
1
];
// num heads
const
int64_t
ne02
=
src0
->
ne
[
2
];
// num heads
const
int64_t
nr
=
ggml_nrows
(
src0
);
//const int n_past = ((int32_t *) dst->op_params)[0];
...
...
@@ -236,6 +423,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const
int
mode
=
((
int32_t
*
)
dst
->
op_params
)[
2
];
//const int n_ctx = ((int32_t *) dst->op_params)[3];
const
int
n_ctx_orig
=
((
int32_t
*
)
dst
->
op_params
)[
4
];
mrope_sections
sections
;
// RoPE alteration for extended context
float
freq_base
;
...
...
@@ -251,8 +439,19 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
memcpy
(
&
attn_factor
,
(
int32_t
*
)
dst
->
op_params
+
8
,
sizeof
(
float
));
memcpy
(
&
beta_fast
,
(
int32_t
*
)
dst
->
op_params
+
9
,
sizeof
(
float
));
memcpy
(
&
beta_slow
,
(
int32_t
*
)
dst
->
op_params
+
10
,
sizeof
(
float
));
memcpy
(
&
sections
.
v
,
(
int32_t
*
)
dst
->
op_params
+
11
,
sizeof
(
int
)
*
4
);
const
bool
is_neox
=
mode
&
GGML_ROPE_TYPE_NEOX
;
const
bool
is_mrope
=
mode
&
GGML_ROPE_TYPE_MROPE
;
const
bool
is_vision
=
mode
==
GGML_ROPE_TYPE_VISION
;
if
(
is_mrope
)
{
GGML_ASSERT
(
sections
.
v
[
0
]
>
0
||
sections
.
v
[
1
]
>
0
||
sections
.
v
[
2
]
>
0
);
}
if
(
is_vision
)
{
GGML_ASSERT
(
n_dims
==
ne00
/
2
);
}
const
int32_t
*
pos
=
(
const
int32_t
*
)
src1_d
;
...
...
@@ -279,6 +478,34 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
}
else
{
GGML_ABORT
(
"fatal error"
);
}
}
else
if
(
is_mrope
&&
!
is_vision
)
{
if
(
src0
->
type
==
GGML_TYPE_F32
)
{
rope_multi_cuda_f32
(
(
const
float
*
)
src0_d
,
(
float
*
)
dst_d
,
ne00
,
ne02
,
n_dims
,
nr
,
pos
,
freq_scale
,
ne01
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
else
if
(
src0
->
type
==
GGML_TYPE_F16
)
{
rope_multi_cuda_f16
(
(
const
half
*
)
src0_d
,
(
half
*
)
dst_d
,
ne00
,
ne02
,
n_dims
,
nr
,
pos
,
freq_scale
,
ne01
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
else
{
GGML_ABORT
(
"fatal error"
);
}
}
else
if
(
is_vision
)
{
if
(
src0
->
type
==
GGML_TYPE_F32
)
{
rope_vision_cuda_f32
(
(
const
float
*
)
src0_d
,
(
float
*
)
dst_d
,
ne00
,
ne02
,
n_dims
,
nr
,
pos
,
freq_scale
,
ne01
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
else
if
(
src0
->
type
==
GGML_TYPE_F16
)
{
rope_vision_cuda_f16
(
(
const
half
*
)
src0_d
,
(
half
*
)
dst_d
,
ne00
,
ne02
,
n_dims
,
nr
,
pos
,
freq_scale
,
ne01
,
freq_base
,
ext_factor
,
attn_factor
,
corr_dims
,
freq_factors
,
sections
,
stream
);
}
else
{
GGML_ABORT
(
"fatal error"
);
}
}
else
{
if
(
src0
->
type
==
GGML_TYPE_F32
)
{
rope_norm_cuda_f32
(
...
...
llama/ggml-cuda/rope.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/scale.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/scale.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/softmax.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/softmax.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/sum.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
@@ -29,8 +29,6 @@
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
#ifdef USE_CUB
// On Windows CUB uses libraries with variables called CC_PASCAL which conflict with the define in common.cuh.
// For this reason CUB must be included BEFORE anything else.
#include <cub/cub.cuh>
using
namespace
cub
;
#endif // USE_CUB
...
...
llama/ggml-cuda/sum.cuh
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/sumrows.cu
View file @
7a81daf0
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
* MIT License
*
...
...
Prev
1
2
3
4
5
6
7
8
9
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment