Unverified Commit 976ef2b8 authored by hclearner's avatar hclearner Committed by GitHub
Browse files

Update local_exchange.cuh

parent 2a77b772
...@@ -28,7 +28,7 @@ void fmoe_cuda_assign_pos_impl( ...@@ -28,7 +28,7 @@ void fmoe_cuda_assign_pos_impl(
#define PERTHREAD_EXPERTS 256 #define PERTHREAD_EXPERTS 256
#ifdef MOE_HIP_DIFF #ifdef FMOE_USE_HIP
#define WARP_SIZE 64 #define WARP_SIZE 64
#else #else
#define WARP_SIZE 32 #define WARP_SIZE 32
...@@ -57,7 +57,7 @@ void expert_count_kernel(const long* gate_idx, int* expert_count, ...@@ -57,7 +57,7 @@ void expert_count_kernel(const long* gate_idx, int* expert_count,
int x = res_tmp[i - expert_min]; int x = res_tmp[i - expert_min];
#pragma unroll #pragma unroll
for (int j = 1; j < WARP_SIZE; j <<= 1) { for (int j = 1; j < WARP_SIZE; j <<= 1) {
#ifdef MOE_HIP_DIFF #ifdef FMOE_USE_HIP
x = x + __shfl_down(x, j); x = x + __shfl_down(x, j);
#else #else
x = x + __shfl_down_sync(-1u, x, j); x = x + __shfl_down_sync(-1u, x, j);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment