fix: 修复寒武纪编译cpu时的警告

0bf217c1 · PanZezhong · 7a833987 · 0bf217c1 · 0bf217c1 · 0bf217c1
Commit 0bf217c1 authored Feb 13, 2025 by PanZezhong
3 changed files
--- a/src/infiniop/devices/cpu/common_cpu.cc
+++ b/src/infiniop/devices/cpu/common_cpu.cc
 #include "./common_cpu.h"
 float f16_to_f32(uint16_t h) {
-    uint32_t sign = (h & 0x8000) << 16; // Extract the sign bit
+    uint32_t sign = (h & 0x8000) << 16;
-    int32_t exponent = (h >> 10) & 0x1F;// Extract the exponent
+    int32_t exponent = (h >> 10) & 0x1F;
-    uint32_t mantissa = h & 0x3FF;      // Extract the mantissa (fraction part)
+    uint32_t mantissa = h & 0x3FF;
-    if (exponent == 31) {// Special case for Inf and NaN
+    uint32_t f32;
+    if (exponent == 31) {
        if (mantissa != 0) {
-            // NaN: Set float32 NaN
+            f32 = sign | 0x7F800000 | (mantissa << 13);
-            uint32_t f32 = sign | 0x7F800000 | (mantissa << 13);
-            return *(float *) &f32;
        } else {
-            // Infinity
+            f32 = sign | 0x7F800000;
-            uint32_t f32 = sign | 0x7F800000;
-            return *(float *) &f32;
        }
-    } else if (exponent == 0) {// Subnormal float16 or zero
+    } else if (exponent == 0) {
        if (mantissa == 0) {
-            // Zero (positive or negative)
+            f32 = sign;
-            uint32_t f32 = sign;// Just return signed zero
-            return *(float *) &f32;
        } else {
-            // Subnormal: Convert to normalized float32
+            exponent = -14;
-            exponent = -14;                  // Set exponent for subnormal numbers
+            while ((mantissa & 0x400) == 0) {
-            while ((mantissa & 0x400) == 0) {// Normalize mantissa
                mantissa <<= 1;
                exponent--;
            }
-            mantissa &= 0x3FF;// Clear the leading 1 bit
+            mantissa &= 0x3FF;
-            uint32_t f32 = sign | ((exponent + 127) << 23) | (mantissa << 13);
+            f32 = sign | ((exponent + 127) << 23) | (mantissa << 13);
-            return *(float *) &f32;
        }
    } else {
-        // Normalized float16
+        f32 = sign | ((exponent + 127 - 15) << 23) | (mantissa << 13);
-        uint32_t f32 = sign | ((exponent + 127 - 15) << 23) | (mantissa << 13);
-        return *(float *) &f32;
    }
+    float result;
+    memcpy(&result, &f32, sizeof(result));
+    return result;
 }
 uint16_t f32_to_f16(float val) {
-    uint32_t f32 = *(uint32_t *) &val;            // Read the bits of the float32
+    uint32_t f32;
-    uint16_t sign = (f32 >> 16) & 0x8000;         // Extract the sign bit
+    memcpy(&f32, &val, sizeof(f32));      // Read the bits of the float32
-    int32_t exponent = ((f32 >> 23) & 0xFF) - 127;// Extract and de-bias the exponent
+    uint16_t sign = (f32 >> 16) & 0x8000; // Extract the sign bit
-    uint32_t mantissa = f32 & 0x7FFFFF;           // Extract the mantissa (fraction part)
+    int32_t exponent =
+        ((f32 >> 23) & 0xFF) - 127;     // Extract and de-bias the exponent
+    uint32_t mantissa = f32 & 0x7FFFFF; // Extract the mantissa (fraction part)
-    if (exponent >= 31) {// Special cases for Inf and NaN
+    if (exponent >= 31) { // Special cases for Inf and NaN
        // NaN
        if (exponent == 128 && mantissa != 0) {
            return sign | 0x7E00;
        }
        // Infinity
        return sign | 0x7C00;
-    } else if (exponent >= -14) {// Normalized case
+    } else if (exponent >= -14) { // Normalized case
        return (uint16_t)(sign | ((exponent + 15) << 10) | (mantissa >> 13));
    } else if (exponent >= -24) {
-        mantissa |= 0x800000;// Add implicit leading 1
+        mantissa |= 0x800000; // Add implicit leading 1
        mantissa >>= (-14 - exponent);
        return (uint16_t)(sign | (mantissa >> 13));
    } else {
@@ -63,7 +60,9 @@ uint16_t f32_to_f16(float val) {
    }
 }
-size_t indexToReducedOffset(size_t flat_index, size_t ndim, int64_t const *broadcasted_strides, int64_t const *target_strides) {
+size_t indexToReducedOffset(size_t flat_index, size_t ndim,
+                            int64_t const *broadcasted_strides,
+                            int64_t const *target_strides) {
    size_t res = 0;
    for (size_t i = 0; i < ndim; ++i) {
        res += flat_index / broadcasted_strides[i] * target_strides[i];
@@ -72,7 +71,8 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim, int64_t const *broad
    return res;
 }
-size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, int64_t const *strides) {
+size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
+                     int64_t const *strides) {
    size_t res = 0;
    for (size_t i = ndim; i-- >= 0;) {
        res += (flat_index % shape[i]) * strides[i];
@@ -89,11 +89,12 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) {
    return total_size;
 }
-std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, size_t const *pads) {
+std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape,
+                                   size_t const *pads) {
    std::vector<size_t> padded_shape(ndim);
    memcpy(padded_shape.data(), shape, ndim * sizeof(size_t));
    for (size_t i = 2; i < ndim; ++i) {
        padded_shape[i] += 2 * pads[i - 2];
    }
-    return std::move(padded_shape);
+    return padded_shape;
 }
--- a/src/infiniop/ops/matmul/cpu/matmul_cpu.cc
+++ b/src/infiniop/ops/matmul/cpu/matmul_cpu.cc
@@ -46,15 +46,15 @@ infiniopStatus_t matmul_cpu(infiniopMatmulCpuDescriptor_t desc, void *c,
        std::swap(a, b);
    }
-    for (int i = 0; i < info.batch; ++i) {
+    for (size_t i = 0; i < info.batch; ++i) {
-        for (int m_ = 0; m_ < info.m; ++m_) {
+        for (size_t m_ = 0; m_ < info.m; ++m_) {
-            for (int n_ = 0; n_ < info.n; ++n_) {
+            for (size_t n_ = 0; n_ < info.n; ++n_) {
                auto c_ = reinterpret_cast<Tdata *>(c) +
                          i * info.c_matrix.stride +
                          m_ * info.c_matrix.row_stride +
                          n_ * info.c_matrix.col_stride;
                float sum = 0;
-                for (int k_ = 0; k_ < info.k; ++k_) {
+                for (size_t k_ = 0; k_ < info.k; ++k_) {
                    auto a_ = reinterpret_cast<Tdata const *>(a) +
                              i * info.a_matrix.stride +
                              m_ * info.a_matrix.row_stride +

--- a/src/infiniop/ops/matmul/operator.cc
+++ b/src/infiniop/ops/matmul/operator.cc
@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateMatmulDescriptor(
    }
 #endif
    }
-    return INFINIOP_STATUS_BAD_DEVICE;
+    return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
 __C infiniopStatus_t
@@ -76,7 +76,7 @@ infiniopGetMatmulWorkspaceSize(infiniopMatmulDescriptor_t desc, size_t *size) {
    }
 #endif
    }
-    return INFINIOP_STATUS_BAD_DEVICE;
+    return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
 __C infiniopStatus_t infiniopMatmul(infiniopMatmulDescriptor_t desc,
@@ -106,7 +106,7 @@ __C infiniopStatus_t infiniopMatmul(infiniopMatmulDescriptor_t desc,
                           workspace_size, c, a, b, alpha, beta, stream);
 #endif
    }
-    return INFINIOP_STATUS_BAD_DEVICE;
+    return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }
 __C infiniopStatus_t
@@ -134,5 +134,5 @@ infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t desc) {
    }
 #endif
    }
-    return INFINIOP_STATUS_BAD_DEVICE;
+    return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
 }