Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
84e5b6ff
Commit
84e5b6ff
authored
Feb 04, 2025
by
xuxzh1
🎱
Browse files
Adapt to Ollama version 0.5.7, which supports DeepSeek-R1 inference
parent
a420a453
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
56 additions
and
523 deletions
+56
-523
README.md
README.md
+42
-510
discover/amd_linux.go
discover/amd_linux.go
+3
-3
llama/ggml-cuda/mmvq.cu
llama/ggml-cuda/mmvq.cu
+1
-1
llama/ggml-cuda/norm.cu
llama/ggml-cuda/norm.cu
+3
-3
make/Makefile.rocm
make/Makefile.rocm
+5
-5
make/gpu.make
make/gpu.make
+2
-1
No files found.
README.md
View file @
84e5b6ff
This diff is collapsed.
Click to expand it.
discover/amd_linux.go
View file @
84e5b6ff
...
@@ -22,7 +22,7 @@ import (
...
@@ -22,7 +22,7 @@ import (
// Discovery logic for AMD/ROCm GPUs
// Discovery logic for AMD/ROCm GPUs
const
(
const
(
DriverVersionFile
=
"/sys/module/
amdgp
u/version"
DriverVersionFile
=
"/sys/module/
hydc
u/version"
AMDNodesSysfsDir
=
"/sys/class/kfd/kfd/topology/nodes/"
AMDNodesSysfsDir
=
"/sys/class/kfd/kfd/topology/nodes/"
GPUPropertiesFileGlob
=
AMDNodesSysfsDir
+
"*/properties"
GPUPropertiesFileGlob
=
AMDNodesSysfsDir
+
"*/properties"
...
@@ -42,8 +42,8 @@ const (
...
@@ -42,8 +42,8 @@ const (
var
(
var
(
// Used to validate if the given ROCm lib is usable
// Used to validate if the given ROCm lib is usable
ROCmLibGlobs
=
[]
string
{
"libhipblas.so.
2*
"
,
"rocblas"
}
// TODO - probably include more coverage of files here...
ROCmLibGlobs
=
[]
string
{
"libhipblas.so.
0.1
"
,
"rocblas"
}
// TODO - probably include more coverage of files here...
RocmStandardLocations
=
[]
string
{
"/opt/
rocm
/lib"
,
"/usr/lib64"
}
RocmStandardLocations
=
[]
string
{
"/opt/
dtk
/lib"
,
"/usr/lib64"
}
)
)
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
...
...
llama/ggml-cuda/mmvq.cu
View file @
84e5b6ff
...
@@ -168,7 +168,7 @@ static void mul_mat_vec_q_cuda(
...
@@ -168,7 +168,7 @@ static void mul_mat_vec_q_cuda(
int64_t
nwarps
=
1
;
int64_t
nwarps
=
1
;
int64_t
rows_per_cuda_block
=
1
;
int64_t
rows_per_cuda_block
=
1
;
if
(
ggml_cuda_info
().
devices
[
id
].
cc
<
GGML_CUDA_CC_CDNA
||
ggml_cuda_info
().
devices
[
id
].
cc
==
GGML_CUDA_CC_RDNA1
)
{
// NVIDIA and AMD older than RDNA2 but not CDNA
if
(
ggml_cuda_info
().
devices
[
id
].
cc
<
1001030
)
{
// NVIDIA and AMD older than RDNA2 but not CDNA
switch
(
ncols_y
)
{
switch
(
ncols_y
)
{
case
1
:
case
1
:
nwarps
=
4
;
nwarps
=
4
;
...
...
llama/ggml-cuda/norm.cu
View file @
84e5b6ff
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
#include "norm.cuh"
#include "norm.cuh"
template
<
int
block_size
>
template
<
int
block_size
>
static
__global__
void
norm_f32
(
const
float
*
x
,
float
*
dst
,
const
int
ncols
,
const
float
eps
)
{
static
__global__
void
__launch_bounds__
(
1024
)
norm_f32
(
const
float
*
x
,
float
*
dst
,
const
int
ncols
,
const
float
eps
)
{
const
int
row
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
const
int
row
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
const
int
tid
=
threadIdx
.
x
;
const
int
tid
=
threadIdx
.
x
;
...
@@ -63,7 +63,7 @@ static __global__ void norm_f32(const float * x, float * dst, const int ncols, c
...
@@ -63,7 +63,7 @@ static __global__ void norm_f32(const float * x, float * dst, const int ncols, c
}
}
template
<
int
block_size
>
template
<
int
block_size
>
static
__global__
void
group_norm_f32
(
const
float
*
x
,
float
*
dst
,
const
int
group_size
,
const
int
ne_elements
,
const
float
eps
)
{
static
__global__
void
__launch_bounds__
(
1024
)
group_norm_f32
(
const
float
*
x
,
float
*
dst
,
const
int
group_size
,
const
int
ne_elements
,
const
float
eps
)
{
// blockIdx.x: num_groups idx
// blockIdx.x: num_groups idx
// threadIdx.x: block_size idx
// threadIdx.x: block_size idx
int
start
=
blockIdx
.
x
*
group_size
;
int
start
=
blockIdx
.
x
*
group_size
;
...
@@ -124,7 +124,7 @@ static __global__ void group_norm_f32(const float * x, float * dst, const int gr
...
@@ -124,7 +124,7 @@ static __global__ void group_norm_f32(const float * x, float * dst, const int gr
}
}
template
<
int
block_size
>
template
<
int
block_size
>
static
__global__
void
rms_norm_f32
(
const
float
*
x
,
float
*
dst
,
const
int
ncols
,
const
float
eps
)
{
static
__global__
void
__launch_bounds__
(
1024
)
rms_norm_f32
(
const
float
*
x
,
float
*
dst
,
const
int
ncols
,
const
float
eps
)
{
const
int
row
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
const
int
row
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
const
int
tid
=
threadIdx
.
x
;
const
int
tid
=
threadIdx
.
x
;
...
...
make/Makefile.rocm
View file @
84e5b6ff
...
@@ -6,8 +6,8 @@
...
@@ -6,8 +6,8 @@
include
make/common-defs.make
include
make/common-defs.make
include
make/rocm-defs.make
include
make/rocm-defs.make
HIP_ARCHS_COMMON
:=
gfx9
00 gfx940 gfx941 gfx942 gfx1010 gfx1012 gfx1030 gfx1100 gfx1101 gfx1102
HIP_ARCHS_COMMON
:=
gfx9
28 gfx906
HIP_ARCHS_LINUX
:=
gfx9
06:xnack- gfx908:xnack- gfx90a:xnack+ gfx90a:xnack-
HIP_ARCHS_LINUX
:=
gfx9
28 gfx906
ifeq
($(OS),windows)
ifeq
($(OS),windows)
GPU_LIB_DIR
:=
$(
shell
cygpath
-m
-s
"
$(HIP_PATH)
/bin"
)
GPU_LIB_DIR
:=
$(
shell
cygpath
-m
-s
"
$(HIP_PATH)
/bin"
)
...
@@ -88,8 +88,8 @@ GPU_COMPILER_CUFLAGS = \
...
@@ -88,8 +88,8 @@ GPU_COMPILER_CUFLAGS = \
-DUSE_PROF_API
=
1
\
-DUSE_PROF_API
=
1
\
-std
=
gnu++17
\
-std
=
gnu++17
\
-x
hip
\
-x
hip
\
-mllvm
=
-amdgpu-early-inline-all
=
true
\
#
-mllvm=-amdgpu-early-inline-all=true
\
-mllvm
=
-amdgpu-function-calls
=
false
\
#
-mllvm=-amdgpu-function-calls=false
\
-Wno-expansion-to-defined
\
-Wno-expansion-to-defined
\
-Wno-invalid-noreturn
\
-Wno-invalid-noreturn
\
-Wno-ignored-attributes
\
-Wno-ignored-attributes
\
...
@@ -111,7 +111,7 @@ $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_s
...
@@ -111,7 +111,7 @@ $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_s
$(ROCBLAS_DIST_DEP_MANIFEST)
:
$(ROCBLAS_DIST_DEP_MANIFEST)
:
@
-mkdir
-p
$(
dir
$@
)
@
-mkdir
-p
$(
dir
$@
)
@
echo
"Copying rocblas library..."
@
echo
"Copying rocblas library..."
(
cd
$(
GPU_LIB_DIR)
/rocblas/lib
rary
/
&&
tar
cf -
.
)
|
(
cd
$(
dir
$@
)
&&
tar
xf -
)
cd
$(
HIP_PATH)
/..
/rocblas/lib/
&&
tar
cf -
.
|
(
cd
$(
dir
$@
)
&&
tar
xf -
)
@
echo
"rocblas library copy complete"
@
echo
"rocblas library copy complete"
$(GPU_DIST_TRANSITIVE_LIB_DEPS)
:
$(GPU_DIST_TRANSITIVE_LIB_DEPS)
:
...
...
make/gpu.make
View file @
84e5b6ff
# Generalized GPU runner build
# Generalized GPU runner build
INCLUDES
=
-I
$(CURDIR)
/llama
GPU_COMPILER_CFLAGS
+=
$(INCLUDES)
ifndef
GPU_RUNNER_NAME
ifndef
GPU_RUNNER_NAME
dummy
:
dummy
:
$(
error
This makefile is not meant to build directly, but instead included
in
other Makefiles that
set
required variables
)
$(
error
This makefile is not meant to build directly, but instead included
in
other Makefiles that
set
required variables
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment