Commit 84e5b6ff authored by xuxzh1's avatar xuxzh1 🎱
Browse files

Adapt to Ollama version 0.5.7, which supports DeepSeek-R1 inference

parent a420a453
This diff is collapsed.
...@@ -22,7 +22,7 @@ import ( ...@@ -22,7 +22,7 @@ import (
// Discovery logic for AMD/ROCm GPUs // Discovery logic for AMD/ROCm GPUs
const ( const (
DriverVersionFile = "/sys/module/amdgpu/version" DriverVersionFile = "/sys/module/hydcu/version"
AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/" AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/"
GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties" GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"
...@@ -42,8 +42,8 @@ const ( ...@@ -42,8 +42,8 @@ const (
var ( var (
// Used to validate if the given ROCm lib is usable // Used to validate if the given ROCm lib is usable
ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here... ROCmLibGlobs = []string{"libhipblas.so.0.1","rocblas"} // TODO - probably include more coverage of files here...
RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"} RocmStandardLocations = []string{"/opt/dtk/lib", "/usr/lib64"}
) )
// Gather GPU information from the amdgpu driver if any supported GPUs are detected // Gather GPU information from the amdgpu driver if any supported GPUs are detected
......
...@@ -168,7 +168,7 @@ static void mul_mat_vec_q_cuda( ...@@ -168,7 +168,7 @@ static void mul_mat_vec_q_cuda(
int64_t nwarps = 1; int64_t nwarps = 1;
int64_t rows_per_cuda_block = 1; int64_t rows_per_cuda_block = 1;
if (ggml_cuda_info().devices[id].cc < GGML_CUDA_CC_CDNA || ggml_cuda_info().devices[id].cc == GGML_CUDA_CC_RDNA1) { // NVIDIA and AMD older than RDNA2 but not CDNA if (ggml_cuda_info().devices[id].cc < 1001030) { // NVIDIA and AMD older than RDNA2 but not CDNA
switch(ncols_y) { switch(ncols_y) {
case 1: case 1:
nwarps = 4; nwarps = 4;
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include "norm.cuh" #include "norm.cuh"
template <int block_size> template <int block_size>
static __global__ void norm_f32(const float * x, float * dst, const int ncols, const float eps) { static __global__ void __launch_bounds__(1024) norm_f32(const float * x, float * dst, const int ncols, const float eps) {
const int row = blockIdx.x*blockDim.y + threadIdx.y; const int row = blockIdx.x*blockDim.y + threadIdx.y;
const int tid = threadIdx.x; const int tid = threadIdx.x;
...@@ -63,7 +63,7 @@ static __global__ void norm_f32(const float * x, float * dst, const int ncols, c ...@@ -63,7 +63,7 @@ static __global__ void norm_f32(const float * x, float * dst, const int ncols, c
} }
template <int block_size> template <int block_size>
static __global__ void group_norm_f32(const float * x, float * dst, const int group_size, const int ne_elements, const float eps) { static __global__ void __launch_bounds__(1024) group_norm_f32(const float * x, float * dst, const int group_size, const int ne_elements, const float eps) {
// blockIdx.x: num_groups idx // blockIdx.x: num_groups idx
// threadIdx.x: block_size idx // threadIdx.x: block_size idx
int start = blockIdx.x * group_size; int start = blockIdx.x * group_size;
...@@ -124,7 +124,7 @@ static __global__ void group_norm_f32(const float * x, float * dst, const int gr ...@@ -124,7 +124,7 @@ static __global__ void group_norm_f32(const float * x, float * dst, const int gr
} }
template <int block_size> template <int block_size>
static __global__ void rms_norm_f32(const float * x, float * dst, const int ncols, const float eps) { static __global__ void __launch_bounds__(1024) rms_norm_f32(const float * x, float * dst, const int ncols, const float eps) {
const int row = blockIdx.x*blockDim.y + threadIdx.y; const int row = blockIdx.x*blockDim.y + threadIdx.y;
const int tid = threadIdx.x; const int tid = threadIdx.x;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
include make/common-defs.make include make/common-defs.make
include make/rocm-defs.make include make/rocm-defs.make
HIP_ARCHS_COMMON := gfx900 gfx940 gfx941 gfx942 gfx1010 gfx1012 gfx1030 gfx1100 gfx1101 gfx1102 HIP_ARCHS_COMMON := gfx928 gfx906
HIP_ARCHS_LINUX := gfx906:xnack- gfx908:xnack- gfx90a:xnack+ gfx90a:xnack- HIP_ARCHS_LINUX := gfx928 gfx906
ifeq ($(OS),windows) ifeq ($(OS),windows)
GPU_LIB_DIR := $(shell cygpath -m -s "$(HIP_PATH)/bin") GPU_LIB_DIR := $(shell cygpath -m -s "$(HIP_PATH)/bin")
...@@ -88,8 +88,8 @@ GPU_COMPILER_CUFLAGS = \ ...@@ -88,8 +88,8 @@ GPU_COMPILER_CUFLAGS = \
-DUSE_PROF_API=1 \ -DUSE_PROF_API=1 \
-std=gnu++17 \ -std=gnu++17 \
-x hip \ -x hip \
-mllvm=-amdgpu-early-inline-all=true \ #-mllvm=-amdgpu-early-inline-all=true \
-mllvm=-amdgpu-function-calls=false \ #-mllvm=-amdgpu-function-calls=false \
-Wno-expansion-to-defined \ -Wno-expansion-to-defined \
-Wno-invalid-noreturn \ -Wno-invalid-noreturn \
-Wno-ignored-attributes \ -Wno-ignored-attributes \
...@@ -111,7 +111,7 @@ $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_s ...@@ -111,7 +111,7 @@ $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_s
$(ROCBLAS_DIST_DEP_MANIFEST): $(ROCBLAS_DIST_DEP_MANIFEST):
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
@echo "Copying rocblas library..." @echo "Copying rocblas library..."
(cd $(GPU_LIB_DIR)/rocblas/library/ && tar cf - . ) | (cd $(dir $@) && tar xf - ) cd $(HIP_PATH)/../rocblas/lib/ && tar cf - . | (cd $(dir $@) && tar xf - )
@echo "rocblas library copy complete" @echo "rocblas library copy complete"
$(GPU_DIST_TRANSITIVE_LIB_DEPS): $(GPU_DIST_TRANSITIVE_LIB_DEPS):
......
# Generalized GPU runner build # Generalized GPU runner build
INCLUDES = -I$(CURDIR)/llama
GPU_COMPILER_CFLAGS += $(INCLUDES)
ifndef GPU_RUNNER_NAME ifndef GPU_RUNNER_NAME
dummy: dummy:
$(error This makefile is not meant to build directly, but instead included in other Makefiles that set required variables) $(error This makefile is not meant to build directly, but instead included in other Makefiles that set required variables)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment