mv atomic_add function to common_kunlun.h

0fdaa522 · zhangyue · 4d745cf9 · 0fdaa522 · 0fdaa522
Commit 0fdaa522 authored Apr 03, 2025 by zhangyue
Showing with 11 additions and 11 deletions

src/infiniop/devices/kunlun/kunlun_common.h src/infiniop/devices/kunlun/kunlun_common.h +11 -1

src/infiniop/ops/rms_norm/kunlun/rms_norm_kernel.xpu src/infiniop/ops/rms_norm/kunlun/rms_norm_kernel.xpu +0 -10

No files found.
--- a/src/infiniop/devices/kunlun/kunlun_common.h
+++ b/src/infiniop/devices/kunlun/kunlun_common.h
@@ -13,4 +13,14 @@ static inline __device__ float lowerBitMask(int i) {
    return (1 << (i + 1)) - 1;
 }

-#endif
\ No newline at end of file
+// Atomic add for reduce
+static inline __device__ void atomic_add(__shared_ptr__ float *ptr, float value) {
+    int fail = 1;
+    while (fail) {
+        float a = SM2REG_atomic(ptr);
+        a = a + value;
+        fail = REG2SM_atomic(ptr, a);
+    }
+}
+
+#endif
--- a/src/infiniop/ops/rms_norm/kunlun/rms_norm_kernel.xpu
+++ b/src/infiniop/ops/rms_norm/kunlun/rms_norm_kernel.xpu
@@ -26,16 +26,6 @@ static inline __device__ void elementMul(float *x, float *w, float *y, int count
    }
 }

-// Atomic add for reduce
-static inline __device__ void atomic_add(__shared_ptr__ float *ptr, float value) {
-    int fail = 1;
-    while (fail) {
-        float a = SM2REG_atomic(ptr);
-        a = a + value;
-        fail = REG2SM_atomic(ptr, a);
-    }
-}
-
 // RmsNorm main kernel func
 // kunlun2 has 8 cluster and 64 core
 // Call it by rmsnorm<<<8, 32, stream>>>()