Commit 84e5b6ff authored by xuxzh1's avatar xuxzh1 🎱
Browse files

Adapt to Ollama version 0.5.7, which supports DeepSeek-R1 inference

parent a420a453
This diff is collapsed.
......@@ -22,7 +22,7 @@ import (
// Discovery logic for AMD/ROCm GPUs
const (
DriverVersionFile = "/sys/module/amdgpu/version"
DriverVersionFile = "/sys/module/hydcu/version"
AMDNodesSysfsDir = "/sys/class/kfd/kfd/topology/nodes/"
GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"
......@@ -42,8 +42,8 @@ const (
var (
// Used to validate if the given ROCm lib is usable
ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
ROCmLibGlobs = []string{"libhipblas.so.0.1","rocblas"} // TODO - probably include more coverage of files here...
RocmStandardLocations = []string{"/opt/dtk/lib", "/usr/lib64"}
)
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
......
......@@ -168,7 +168,7 @@ static void mul_mat_vec_q_cuda(
int64_t nwarps = 1;
int64_t rows_per_cuda_block = 1;
if (ggml_cuda_info().devices[id].cc < GGML_CUDA_CC_CDNA || ggml_cuda_info().devices[id].cc == GGML_CUDA_CC_RDNA1) { // NVIDIA and AMD older than RDNA2 but not CDNA
if (ggml_cuda_info().devices[id].cc < 1001030) { // NVIDIA and AMD older than RDNA2 but not CDNA
switch(ncols_y) {
case 1:
nwarps = 4;
......
......@@ -27,7 +27,7 @@
#include "norm.cuh"
template <int block_size>
static __global__ void norm_f32(const float * x, float * dst, const int ncols, const float eps) {
static __global__ void __launch_bounds__(1024) norm_f32(const float * x, float * dst, const int ncols, const float eps) {
const int row = blockIdx.x*blockDim.y + threadIdx.y;
const int tid = threadIdx.x;
......@@ -63,7 +63,7 @@ static __global__ void norm_f32(const float * x, float * dst, const int ncols, c
}
template <int block_size>
static __global__ void group_norm_f32(const float * x, float * dst, const int group_size, const int ne_elements, const float eps) {
static __global__ void __launch_bounds__(1024) group_norm_f32(const float * x, float * dst, const int group_size, const int ne_elements, const float eps) {
// blockIdx.x: num_groups idx
// threadIdx.x: block_size idx
int start = blockIdx.x * group_size;
......@@ -124,7 +124,7 @@ static __global__ void group_norm_f32(const float * x, float * dst, const int gr
}
template <int block_size>
static __global__ void rms_norm_f32(const float * x, float * dst, const int ncols, const float eps) {
static __global__ void __launch_bounds__(1024) rms_norm_f32(const float * x, float * dst, const int ncols, const float eps) {
const int row = blockIdx.x*blockDim.y + threadIdx.y;
const int tid = threadIdx.x;
......
......@@ -6,8 +6,8 @@
include make/common-defs.make
include make/rocm-defs.make
HIP_ARCHS_COMMON := gfx900 gfx940 gfx941 gfx942 gfx1010 gfx1012 gfx1030 gfx1100 gfx1101 gfx1102
HIP_ARCHS_LINUX := gfx906:xnack- gfx908:xnack- gfx90a:xnack+ gfx90a:xnack-
HIP_ARCHS_COMMON := gfx928 gfx906
HIP_ARCHS_LINUX := gfx928 gfx906
ifeq ($(OS),windows)
GPU_LIB_DIR := $(shell cygpath -m -s "$(HIP_PATH)/bin")
......@@ -88,8 +88,8 @@ GPU_COMPILER_CUFLAGS = \
-DUSE_PROF_API=1 \
-std=gnu++17 \
-x hip \
-mllvm=-amdgpu-early-inline-all=true \
-mllvm=-amdgpu-function-calls=false \
#-mllvm=-amdgpu-early-inline-all=true \
#-mllvm=-amdgpu-function-calls=false \
-Wno-expansion-to-defined \
-Wno-invalid-noreturn \
-Wno-ignored-attributes \
......@@ -111,7 +111,7 @@ $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_s
$(ROCBLAS_DIST_DEP_MANIFEST):
@-mkdir -p $(dir $@)
@echo "Copying rocblas library..."
(cd $(GPU_LIB_DIR)/rocblas/library/ && tar cf - . ) | (cd $(dir $@) && tar xf - )
cd $(HIP_PATH)/../rocblas/lib/ && tar cf - . | (cd $(dir $@) && tar xf - )
@echo "rocblas library copy complete"
$(GPU_DIST_TRANSITIVE_LIB_DEPS):
......
# Generalized GPU runner build
INCLUDES = -I$(CURDIR)/llama
GPU_COMPILER_CFLAGS += $(INCLUDES)
ifndef GPU_RUNNER_NAME
dummy:
$(error This makefile is not meant to build directly, but instead included in other Makefiles that set required variables)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment