Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
b3befef4
Commit
b3befef4
authored
Apr 01, 2025
by
xuxz
Browse files
[Rocm]Decouple the driving dependencies && [Bugfix]accuracy problem
parent
9de895f0
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
30 additions
and
30 deletions
+30
-30
README.md
README.md
+1
-1
discover/amd_linux.go
discover/amd_linux.go
+16
-16
llama/ggml-cuda/binbcast.cu
llama/ggml-cuda/binbcast.cu
+1
-1
llama/ggml-cuda/convert.cu
llama/ggml-cuda/convert.cu
+1
-1
llama/ggml-cuda/cpy.cu
llama/ggml-cuda/cpy.cu
+1
-1
llama/ggml-cuda/ggml-cuda.cu
llama/ggml-cuda/ggml-cuda.cu
+1
-1
llama/ggml-cuda/mmvq.cu
llama/ggml-cuda/mmvq.cu
+4
-4
llama/ggml-cuda/quantize.cu
llama/ggml-cuda/quantize.cu
+1
-1
llama/ggml-cuda/rope.cu
llama/ggml-cuda/rope.cu
+1
-1
llama/ggml-cuda/unary.cu
llama/ggml-cuda/unary.cu
+1
-1
make/Makefile.rocm
make/Makefile.rocm
+2
-2
No files found.
README.md
View file @
b3befef4
...
...
@@ -13,7 +13,7 @@ Ollama可快速部署主流模型。
##### Docker
```
bash
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10
-fixpy
docker run
-i
-t
-d
--device
=
/dev/kfd
--privileged
--network
=
host
--device
=
/dev/dri
--cap-add
=
SYS_PTRACE
--security-opt
seccomp
=
unconfined
-v
项目地址
(
绝对路径
)
:/home
-v
/opt/hyhal:/opt/hyhal:ro
-v
--group-add
video
--shm-size
16G
--name
{
容器名
}
{
镜像ID
}
```
...
...
discover/amd_linux.go
View file @
b3befef4
...
...
@@ -55,11 +55,11 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
}
// Opportunistic logging of driver version to aid in troubleshooting
driverMajor
,
driverMinor
,
err
:=
AMDDriverVersion
()
if
err
!=
nil
{
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
slog
.
Warn
(
"ollama recommends running the https://www.amd.com/en/support/linux-drivers"
,
"error"
,
err
)
}
//
driverMajor, driverMinor, err := AMDDriverVersion()
//
if err != nil {
//
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
//
slog.Warn("ollama recommends running the https://www.amd.com/en/support/linux-drivers", "error", err)
//
}
// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
var
visibleDevices
[]
string
...
...
@@ -284,8 +284,8 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
Name
:
name
,
Compute
:
fmt
.
Sprintf
(
"gfx%d%x%x"
,
major
,
minor
,
patch
),
MinimumMemory
:
rocmMinimumMemory
,
DriverMajor
:
driverMajor
,
DriverMinor
:
driverMinor
,
//
DriverMajor: driverMajor,
//
DriverMinor: driverMinor,
},
usedFilepath
:
usedFile
,
index
:
gpuID
,
...
...
@@ -415,15 +415,15 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
// Quick check for AMD driver so we can skip amdgpu discovery if not present
func
AMDDetected
()
bool
{
// Some driver versions (older?) don't have a version file, so just lookup the parent dir
sysfsDir
:=
filepath
.
Dir
(
DriverVersionFile
)
_
,
err
:=
os
.
Stat
(
sysfsDir
)
if
errors
.
Is
(
err
,
os
.
ErrNotExist
)
{
slog
.
Debug
(
"amdgpu driver not detected "
+
sysfsDir
)
return
false
}
else
if
err
!=
nil
{
slog
.
Debug
(
"error looking up amd driver"
,
"path"
,
sysfsDir
,
"error"
,
err
)
return
false
}
//
sysfsDir := filepath.Dir(DriverVersionFile)
//
_, err := os.Stat(sysfsDir)
//
if errors.Is(err, os.ErrNotExist) {
//
slog.Debug("amdgpu driver not detected " + sysfsDir)
//
return false
//
} else if err != nil {
//
slog.Debug("error looking up amd driver", "path", sysfsDir, "error", err)
//
return false
//
}
return
true
}
...
...
llama/ggml-cuda/binbcast.cu
View file @
b3befef4
...
...
@@ -49,7 +49,7 @@ static __device__ __forceinline__ float op_div(const float a, const float b) {
}
template
<
float
(
*
bin_op
)(
const
float
,
const
float
),
typename
src0_t
,
typename
src1_t
,
typename
dst_t
>
static
__global__
__launch_bounds__
(
1024
)
void
k_bin_bcast
(
const
src0_t
*
src0
,
const
src1_t
*
src1
,
dst_t
*
dst
,
static
__global__
void
k_bin_bcast
(
const
src0_t
*
src0
,
const
src1_t
*
src1
,
dst_t
*
dst
,
int
ne0
,
int
ne1
,
int
ne2
,
int
ne3
,
int
ne10
,
int
ne11
,
int
ne12
,
int
ne13
,
/*int s0, */
int
s1
,
int
s2
,
int
s3
,
...
...
llama/ggml-cuda/convert.cu
View file @
b3befef4
...
...
@@ -596,7 +596,7 @@ static void dequantize_row_iq4_xs_cuda(const void * vx, dst_t * y, const int64_t
}
template
<
typename
src_t
,
typename
dst_t
>
static
__global__
__launch_bounds__
(
1024
)
void
convert_unary
(
const
void
*
__restrict__
vx
,
dst_t
*
__restrict__
y
,
const
int64_t
k
)
{
static
__global__
void
convert_unary
(
const
void
*
__restrict__
vx
,
dst_t
*
__restrict__
y
,
const
int64_t
k
)
{
const
int64_t
i
=
(
int64_t
)
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
if
(
i
>=
k
)
{
...
...
llama/ggml-cuda/cpy.cu
View file @
b3befef4
...
...
@@ -57,7 +57,7 @@ static __device__ void cpy_1_f16_f32(const char * cxi, char * cdsti) {
}
template
<
cpy_kernel_t
cpy_1
>
static
__global__
__launch_bounds__
(
1024
)
void
cpy_f32_f16
(
const
char
*
cx
,
char
*
cdst
,
const
int
ne
,
static
__global__
void
cpy_f32_f16
(
const
char
*
cx
,
char
*
cdst
,
const
int
ne
,
const
int
ne00
,
const
int
ne01
,
const
int
ne02
,
const
int
nb00
,
const
int
nb01
,
const
int
nb02
,
const
int
nb03
,
const
int
ne10
,
const
int
ne11
,
const
int
ne12
,
const
int
nb10
,
const
int
nb11
,
const
int
nb12
,
const
int
nb13
)
{
...
...
llama/ggml-cuda/ggml-cuda.cu
View file @
b3befef4
...
...
@@ -1581,7 +1581,7 @@ static void ggml_cuda_op_mul_mat(
}
}
static
__global__
__launch_bounds__
(
1024
)
void
k_compute_batched_ptrs
(
static
__global__
void
k_compute_batched_ptrs
(
const
half
*
src0_as_f16
,
const
half
*
src1_as_f16
,
char
*
dst
,
const
void
**
ptrs_src
,
void
**
ptrs_dst
,
int64_t
ne12
,
int64_t
ne13
,
...
...
llama/ggml-cuda/mmvq.cu
View file @
b3befef4
...
...
@@ -75,8 +75,8 @@
template
<
ggml_type
type
,
int
ncols_y
>
// #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
// tell the compiler to use as many registers as it wants, see nwarps definition below
//__launch_bounds__((ncols_y <= 4 ? 4 : 2)*WARP_SIZE, 1)
//
// tell the compiler to use as many registers as it wants, see nwarps definition below
//
__launch_bounds__((ncols_y <= 4 ? 4 : 2)*WARP_SIZE, 1)
// #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
static
__global__
void
mul_mat_vec_q
(
const
void
*
__restrict__
vx
,
const
void
*
__restrict__
vy
,
float
*
__restrict__
dst
,
...
...
@@ -92,7 +92,7 @@
constexpr
int
rows_per_cuda_block
=
1
;
#else
constexpr
int
nwarps
=
ncols_y
<=
4
?
4
:
2
;
constexpr
int
rows_per_cuda_block
=
ncols_y
==
1
?
1
:
1
;
constexpr
int
rows_per_cuda_block
=
ncols_y
==
1
?
1
:
2
;
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(RDNA2) && !defined(RDNA3)
const
int
tid
=
WARP_SIZE
*
threadIdx
.
y
+
threadIdx
.
x
;
...
...
@@ -176,7 +176,7 @@
case
3
:
case
4
:
nwarps
=
4
;
rows_per_cuda_block
=
1
;
rows_per_cuda_block
=
2
;
break
;
case
5
:
case
6
:
...
...
llama/ggml-cuda/quantize.cu
View file @
b3befef4
...
...
@@ -27,7 +27,7 @@
#include "quantize.cuh"
#include <cstdint>
static
__global__
__launch_bounds__
(
1024
)
void
quantize_q8_1
(
const
float
*
__restrict__
x
,
void
*
__restrict__
vy
,
const
int64_t
kx
,
const
int64_t
kx0_padded
)
{
static
__global__
void
quantize_q8_1
(
const
float
*
__restrict__
x
,
void
*
__restrict__
vy
,
const
int64_t
kx
,
const
int64_t
kx0_padded
)
{
const
int64_t
ix0
=
(
int64_t
)
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
if
(
ix0
>=
kx0_padded
)
{
...
...
llama/ggml-cuda/rope.cu
View file @
b3befef4
...
...
@@ -60,7 +60,7 @@ static __device__ void rope_yarn(
}
template
<
typename
T
,
bool
has_ff
>
static
__global__
__launch_bounds__
(
1024
)
void
rope_norm
(
static
__global__
void
rope_norm
(
const
T
*
x
,
T
*
dst
,
int
ne0
,
int
n_dims
,
const
int32_t
*
pos
,
float
freq_scale
,
int
p_delta_rows
,
float
ext_factor
,
float
attn_factor
,
rope_corr_dims
corr_dims
,
float
theta_scale
,
const
float
*
freq_factors
)
{
const
int
i0
=
2
*
(
blockDim
.
y
*
blockIdx
.
y
+
threadIdx
.
y
);
...
...
llama/ggml-cuda/unary.cu
View file @
b3befef4
...
...
@@ -68,7 +68,7 @@ static __global__ void gelu_quick_f32(const float * x, float * dst, int k) {
dst
[
i
]
=
x
[
i
]
*
(
1.0
f
/
(
1.0
f
+
expf
(
GELU_QUICK_COEF
*
x
[
i
])));
}
static
__global__
__launch_bounds__
(
1024
)
void
silu_f32
(
const
float
*
x
,
float
*
dst
,
const
int
k
)
{
static
__global__
void
silu_f32
(
const
float
*
x
,
float
*
dst
,
const
int
k
)
{
const
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
if
(
i
>=
k
)
{
...
...
make/Makefile.rocm
View file @
b3befef4
...
...
@@ -6,8 +6,8 @@
include
make/common-defs.make
include
make/rocm-defs.make
HIP_ARCHS_COMMON
:=
gfx928 gfx906
HIP_ARCHS_LINUX
:=
gfx928 gfx906
HIP_ARCHS_COMMON
:=
gfx928 gfx906
gfx936
HIP_ARCHS_LINUX
:=
gfx928 gfx906
gfx936
ifeq
($(OS),windows)
GPU_LIB_DIR
:=
$(
shell
cygpath
-m
-s
"
$(HIP_PATH)
/bin"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment