Unverified Commit 9351f91b authored by TY-AMD's avatar TY-AMD Committed by GitHub
Browse files

[BugFix][ROCm] Fix GGUF MoE Dispatch Block_Dim for ROCm (#16247)


Signed-off-by: default avatarTianyuan Wu <Tianyuan.Wu@amd.com>
parent 5a1e1c83
...@@ -129,7 +129,7 @@ static __device__ __forceinline__ void moe_q( ...@@ -129,7 +129,7 @@ static __device__ __forceinline__ void moe_q(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q4_0 64 #define MOE_X_Q4_0 8
#define MOE_Y_Q4_0 128 #define MOE_Y_Q4_0 128
#define NWARPS_Q4_0 8 #define NWARPS_Q4_0 8
#else #else
...@@ -190,7 +190,7 @@ static void ggml_moe_q4_0_q8_1_cuda( ...@@ -190,7 +190,7 @@ static void ggml_moe_q4_0_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q4_1 64 #define MOE_X_Q4_1 8
#define MOE_Y_Q4_1 128 #define MOE_Y_Q4_1 128
#define NWARPS_Q4_1 8 #define NWARPS_Q4_1 8
#else #else
...@@ -251,7 +251,7 @@ static void ggml_moe_q4_1_q8_1_cuda( ...@@ -251,7 +251,7 @@ static void ggml_moe_q4_1_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q5_0 64 #define MOE_X_Q5_0 8
#define MOE_Y_Q5_0 128 #define MOE_Y_Q5_0 128
#define NWARPS_Q5_0 8 #define NWARPS_Q5_0 8
#else #else
...@@ -312,7 +312,7 @@ static void ggml_moe_q5_0_q8_1_cuda( ...@@ -312,7 +312,7 @@ static void ggml_moe_q5_0_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q5_1 64 #define MOE_X_Q5_1 8
#define MOE_Y_Q5_1 128 #define MOE_Y_Q5_1 128
#define NWARPS_Q5_1 8 #define NWARPS_Q5_1 8
#else #else
...@@ -373,7 +373,7 @@ static void ggml_moe_q5_1_q8_1_cuda( ...@@ -373,7 +373,7 @@ static void ggml_moe_q5_1_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q8_0 64 #define MOE_X_Q8_0 8
#define MOE_Y_Q8_0 128 #define MOE_Y_Q8_0 128
#define NWARPS_Q8_0 8 #define NWARPS_Q8_0 8
#else #else
...@@ -434,7 +434,7 @@ static void ggml_moe_q8_0_q8_1_cuda( ...@@ -434,7 +434,7 @@ static void ggml_moe_q8_0_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q2_K 64 #define MOE_X_Q2_K 8
#define MOE_Y_Q2_K 128 #define MOE_Y_Q2_K 128
#define NWARPS_Q2_K 8 #define NWARPS_Q2_K 8
#else #else
...@@ -495,7 +495,7 @@ static void ggml_moe_q2_K_q8_1_cuda( ...@@ -495,7 +495,7 @@ static void ggml_moe_q2_K_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q3_K 64 #define MOE_X_Q3_K 8
#define MOE_Y_Q3_K 128 #define MOE_Y_Q3_K 128
#define NWARPS_Q3_K 8 #define NWARPS_Q3_K 8
#else #else
...@@ -556,7 +556,7 @@ static void ggml_moe_q3_K_q8_1_cuda( ...@@ -556,7 +556,7 @@ static void ggml_moe_q3_K_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q4_K 64 #define MOE_X_Q4_K 8
#define MOE_Y_Q4_K 128 #define MOE_Y_Q4_K 128
#define NWARPS_Q4_K 8 #define NWARPS_Q4_K 8
#else #else
...@@ -617,7 +617,7 @@ static void ggml_moe_q4_K_q8_1_cuda( ...@@ -617,7 +617,7 @@ static void ggml_moe_q4_K_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q5_K 64 #define MOE_X_Q5_K 8
#define MOE_Y_Q5_K 128 #define MOE_Y_Q5_K 128
#define NWARPS_Q5_K 8 #define NWARPS_Q5_K 8
#else #else
...@@ -678,7 +678,7 @@ static void ggml_moe_q5_K_q8_1_cuda( ...@@ -678,7 +678,7 @@ static void ggml_moe_q5_K_q8_1_cuda(
} }
#if defined(USE_ROCM) #if defined(USE_ROCM)
#define MOE_X_Q6_K 64 #define MOE_X_Q6_K 8
#define MOE_Y_Q6_K 128 #define MOE_Y_Q6_K 128
#define NWARPS_Q6_K 8 #define NWARPS_Q6_K 8
#else #else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment