Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
b452da57
You need to sign in or sign up before continuing.
Commit
b452da57
authored
Apr 02, 2025
by
xuxzh1
🎱
Browse files
[update] common.cuh && quantize.cu
parent
ac4166cb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
4 deletions
+6
-4
README.md
README.md
+0
-1
ml/backend/ggml/ggml/src/ggml-cuda/common.cuh
ml/backend/ggml/ggml/src/ggml-cuda/common.cuh
+1
-1
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu
+5
-2
No files found.
README.md
View file @
b452da57
...
@@ -44,7 +44,6 @@ go env -w GOPROXY=https://goproxy.cn,direct
...
@@ -44,7 +44,6 @@ go env -w GOPROXY=https://goproxy.cn,direct
export
LIBRARY_PATH
=
/opt/dtk/lib:
$LIBRARY_PATH
export
LIBRARY_PATH
=
/opt/dtk/lib:
$LIBRARY_PATH
cmake
-B
build
cmake
-B
build
cmake
--build
build
cmake
--build
build
go build .
```
```
## 运行
## 运行
...
...
ml/backend/ggml/ggml/src/ggml-cuda/common.cuh
View file @
b452da57
...
@@ -384,7 +384,7 @@ static __device__ __forceinline__ uint32_t __hgt2_mask(const half2 a, const half
...
@@ -384,7 +384,7 @@ static __device__ __forceinline__ uint32_t __hgt2_mask(const half2 a, const half
static
__device__
__forceinline__
int
ggml_cuda_dp4a
(
const
int
a
,
const
int
b
,
int
c
)
{
static
__device__
__forceinline__
int
ggml_cuda_dp4a
(
const
int
a
,
const
int
b
,
int
c
)
{
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(RDNA2)
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(RDNA2)
|| true
c
=
__builtin_amdgcn_sdot4
(
a
,
b
,
c
,
false
);
c
=
__builtin_amdgcn_sdot4
(
a
,
b
,
c
,
false
);
#elif defined(RDNA3)
#elif defined(RDNA3)
c
=
__builtin_amdgcn_sudot4
(
true
,
a
,
true
,
b
,
c
,
false
);
c
=
__builtin_amdgcn_sudot4
(
true
,
a
,
true
,
b
,
c
,
false
);
...
...
ml/backend/ggml/ggml/src/ggml-cuda/quantize.cu
View file @
b452da57
...
@@ -33,8 +33,11 @@ static __global__ void quantize_q8_1(const float * __restrict__ x, void * __rest
...
@@ -33,8 +33,11 @@ static __global__ void quantize_q8_1(const float * __restrict__ x, void * __rest
return
;
return
;
}
}
reinterpret_cast
<
half
&>
(
y
[
ib
].
ds
.
x
)
=
d
;
ggml_half2
ds
=
{
d
,
sum
};
reinterpret_cast
<
half
&>
(
y
[
ib
].
ds
.
y
)
=
sum
;
y
[
ib
].
ds
=
ds
;
// reinterpret_cast<half&>(y[ib].ds.x) = d;
// reinterpret_cast<half&>(y[ib].ds.y) = sum;
}
}
template
<
mmq_q8_1_ds_layout
ds_layout
>
template
<
mmq_q8_1_ds_layout
ds_layout
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment