Commit b452da57 authored by xuxzh1's avatar xuxzh1 🎱
Browse files

[update] common.cuh && quantize.cu

parent ac4166cb
...@@ -44,7 +44,6 @@ go env -w GOPROXY=https://goproxy.cn,direct ...@@ -44,7 +44,6 @@ go env -w GOPROXY=https://goproxy.cn,direct
export LIBRARY_PATH=/opt/dtk/lib:$LIBRARY_PATH export LIBRARY_PATH=/opt/dtk/lib:$LIBRARY_PATH
cmake -B build cmake -B build
cmake --build build cmake --build build
go build .
``` ```
## 运行 ## 运行
......
...@@ -384,7 +384,7 @@ static __device__ __forceinline__ uint32_t __hgt2_mask(const half2 a, const half ...@@ -384,7 +384,7 @@ static __device__ __forceinline__ uint32_t __hgt2_mask(const half2 a, const half
static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, int c) { static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, int c) {
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(RDNA2) #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(RDNA2) || true
c = __builtin_amdgcn_sdot4(a, b, c, false); c = __builtin_amdgcn_sdot4(a, b, c, false);
#elif defined(RDNA3) #elif defined(RDNA3)
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false); c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
......
...@@ -33,8 +33,11 @@ static __global__ void quantize_q8_1(const float * __restrict__ x, void * __rest ...@@ -33,8 +33,11 @@ static __global__ void quantize_q8_1(const float * __restrict__ x, void * __rest
return; return;
} }
reinterpret_cast<half&>(y[ib].ds.x) = d; ggml_half2 ds = {d, sum};
reinterpret_cast<half&>(y[ib].ds.y) = sum; y[ib].ds = ds;
// reinterpret_cast<half&>(y[ib].ds.x) = d;
// reinterpret_cast<half&>(y[ib].ds.y) = sum;
} }
template <mmq_q8_1_ds_layout ds_layout> template <mmq_q8_1_ds_layout ds_layout>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment