Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
8dd12c87
Unverified
Commit
8dd12c87
authored
May 01, 2025
by
Jeffrey Morgan
Committed by
GitHub
May 01, 2025
Browse files
llama: update to commit e1e8e099 (#10513)
parent
e6d2d041
Changes
68
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
500 additions
and
139 deletions
+500
-139
ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh
ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh
+3
-0
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu
+58
-39
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cuh
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cuh
+9
-6
ml/backend/ggml/ggml/src/ggml-cuda/vecdotq.cuh
ml/backend/ggml/ggml/src/ggml-cuda/vecdotq.cuh
+2
-0
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
+3
-3
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
+366
-40
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
+3
-3
ml/backend/ggml/ggml/src/ggml.c
ml/backend/ggml/ggml/src/ggml.c
+56
-48
No files found.
ml/backend/ggml/ggml/src/ggml-cuda/mmvq.cuh
View file @
8dd12c87
...
@@ -2,6 +2,9 @@
...
@@ -2,6 +2,9 @@
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
void
ggml_cuda_mul_mat_vec_q
(
ggml_backend_cuda_context
&
ctx
,
const
ggml_tensor
*
src0
,
const
ggml_tensor
*
src1
,
const
ggml_tensor
*
ids
,
ggml_tensor
*
dst
);
void
ggml_cuda_op_mul_mat_vec_q
(
void
ggml_cuda_op_mul_mat_vec_q
(
ggml_backend_cuda_context
&
ctx
,
ggml_backend_cuda_context
&
ctx
,
const
ggml_tensor
*
src0
,
const
ggml_tensor
*
src1
,
ggml_tensor
*
dst
,
const
char
*
src0_dd_i
,
const
float
*
src1_ddf_i
,
const
ggml_tensor
*
src0
,
const
ggml_tensor
*
src1
,
ggml_tensor
*
dst
,
const
char
*
src0_dd_i
,
const
float
*
src1_ddf_i
,
...
...
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu
View file @
8dd12c87
This diff is collapsed.
Click to expand it.
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cuh
View file @
8dd12c87
...
@@ -12,13 +12,16 @@ static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk
...
@@ -12,13 +12,16 @@ static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk
static_assert
(
MATRIX_ROW_PADDING
%
(
4
*
CUDA_QUANTIZE_BLOCK_SIZE_MMQ
)
==
0
,
"Risk of out-of-bounds access."
);
static_assert
(
MATRIX_ROW_PADDING
%
(
4
*
CUDA_QUANTIZE_BLOCK_SIZE_MMQ
)
==
0
,
"Risk of out-of-bounds access."
);
typedef
void
(
*
quantize_cuda_t
)(
typedef
void
(
*
quantize_cuda_t
)(
const
float
*
x
,
void
*
vy
,
const
int64_t
kx0
,
const
int64_t
kx1
,
const
int64_t
channels
,
const
int64_t
kx0_padded
,
const
float
*
x
,
const
int32_t
*
ids
,
void
*
vy
,
const
ggml_type
type_x
,
cudaStream_t
stream
);
ggml_type
type_src0
,
int64_t
ne00
,
int64_t
s01
,
int64_t
s02
,
int64_t
s03
,
int64_t
ne0
,
int64_t
ne1
,
int64_t
ne2
,
int64_t
ne3
,
cudaStream_t
stream
);
void
quantize_row_q8_1_cuda
(
void
quantize_row_q8_1_cuda
(
const
float
*
x
,
void
*
vy
,
const
int64_t
kx0
,
const
int64_t
kx1
,
const
int64_t
channels
,
const
int64_t
kx0_padded
,
const
float
*
x
,
const
int32_t
*
ids
,
void
*
vy
,
const
ggml_type
type_x
,
cudaStream_t
stream
);
ggml_type
type_src0
,
int64_t
ne00
,
int64_t
s01
,
int64_t
s02
,
int64_t
s03
,
int64_t
ne0
,
int64_t
ne1
,
int64_t
ne2
,
int64_t
ne3
,
cudaStream_t
stream
);
void
quantize_mmq_q8_1_cuda
(
void
quantize_mmq_q8_1_cuda
(
const
float
*
x
,
void
*
vy
,
const
int64_t
kx0
,
const
int64_t
kx1
,
const
int64_t
channels
,
const
int64_t
kx0_padded
,
const
float
*
x
,
const
int32_t
*
ids
,
void
*
vy
,
const
ggml_type
type_x
,
cudaStream_t
stream
);
ggml_type
type_src0
,
int64_t
ne00
,
int64_t
s01
,
int64_t
s02
,
int64_t
s03
,
int64_t
ne0
,
int64_t
ne1
,
int64_t
ne2
,
int64_t
ne3
,
cudaStream_t
stream
);
ml/backend/ggml/ggml/src/ggml-cuda/vecdotq.cuh
View file @
8dd12c87
#pragma once
#include "common.cuh"
#include "common.cuh"
#include <cstdint>
#include <cstdint>
...
...
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
View file @
8dd12c87
This diff is collapsed.
Click to expand it.
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
View file @
8dd12c87
This diff is collapsed.
Click to expand it.
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
View file @
8dd12c87
This diff is collapsed.
Click to expand it.
ml/backend/ggml/ggml/src/ggml.c
View file @
8dd12c87
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment