Merge pull request #303 from pengcheng888/issue/302

issue/302 - fix compile error of window system

Merge pull request #303 from pengcheng888/issue/302
issue/302 - fix compile error of window system
301cc55c · PanZezhong1725 · GitHub · 23077c42 · b48d60ee · 301cc55c
Unverified Commit 301cc55c authored Jul 08, 2025 by PanZezhong1725 Committed by GitHub Jul 08, 2025
5 changed files
--- a/README.md
+++ b/README.md
@@ -56,6 +56,7 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]

 1. 项目配置

+   windows系统上，建议使用`xmake v2.8.9`编译项目。
   - 查看当前配置

     ```shell
@@ -73,6 +74,8 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
     ```shell
     # 英伟达
     # 可以指定 CUDA 路径， 一般环境变量为 `CUDA_HOME` 或者 `CUDA_ROOT`
+     # window系统：--cuda="%CUDA_HOME%"
+     # linux系统：--cuda=$CUDA_HOME
     xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv

     # 寒武纪

--- a/src/infiniop/ops/random_sample/cuda/random_sample_kernel.cuh
+++ b/src/infiniop/ops/random_sample/cuda/random_sample_kernel.cuh
@@ -193,7 +193,7 @@ struct Algo {
        argMax_(
            kv_pair,
            logits,
-            n,
+            static_cast<int>(n),
            workspace,
            workspace_size, stream);
        castIdx<<<1, 1, 0, stream>>>((Tidx *)result, kv_pair);
@@ -232,20 +232,20 @@ struct Algo {
        auto block = cub::Min()((size_t)block_size, n);
        auto grid = (n + block - 1) / block;
        // sort
-        fillIndices<<<grid, block, 0, stream>>>(indices, n);
+        fillIndices<<<static_cast<unsigned int>(grid), static_cast<unsigned int>(block), 0, stream>>>(indices, static_cast<int>(n));
        CHECK_CUDA(radixSort(
            workspace_, workspace_size,
            logits, sorted,
            indices, indices_out,
-            n,
+            static_cast<int>(n),
            stream));
        // softmax
-        partialSoftmaxKernel<<<grid, block, 0, stream>>>(sorted, n, temperature);
+        partialSoftmaxKernel<<<static_cast<unsigned int>(grid), static_cast<unsigned int>(block), 0, stream>>>(sorted, static_cast<int>(n), temperature);
        setSoftmaxMaxKernel<<<1, 1, 0, stream>>>(sorted);
        // sum
        CHECK_CUDA(inclusiveSum(
            workspace_, workspace,
-            sorted, n,
+            sorted, static_cast<int>(n),
            stream));
        // sample
        randomSampleKernel<<<1, 1, 0, stream>>>(

--- a/src/infiniop/ops/rearrange/cuda/rearrange_cuda.cu
+++ b/src/infiniop/ops/rearrange/cuda/rearrange_cuda.cu
@@ -297,7 +297,7 @@ utils::Result<RearrangeParams> prepareRearrangeParams(const utils::RearrangeMeta
                block_len.push_back(split_dims[j].num_per_block);
                src_block_stride.push_back(dims[i].src_stride);
                dst_block_stride.push_back(dims[i].dst_stride);
-                split_dims[j].array_struct_idx_block = block_dim;
+                split_dims[j].array_struct_idx_block = static_cast<int>(block_dim);
                block_dim += 1;
                block_len_total *= split_dims[j].num_per_block;
            }
@@ -316,7 +316,7 @@ utils::Result<RearrangeParams> prepareRearrangeParams(const utils::RearrangeMeta
                    grid_len.push_back(split_dims[j].num_per_grid);
                    src_grid_stride.push_back(dims[i].src_stride * split_dims[j].num_per_block);
                    dst_grid_stride.push_back(dims[i].dst_stride * split_dims[j].num_per_block);
-                    split_dims[j].array_struct_idx_grid = grid_len.size() - 1;
+                    split_dims[j].array_struct_idx_grid = static_cast<int>(grid_len.size() - 1);
                }
            }

@@ -420,7 +420,7 @@ infiniStatus_t launchKernel(

    CHECK_OR_RETURN(cudaLaunchKernel(
                        kernel_func,
-                        grid_size, BLOCK_SIZE,
+                        static_cast<unsigned int>(grid_size), static_cast<unsigned int>(BLOCK_SIZE),
                        args, 0, stream)
                        == cudaSuccess,
                    INFINI_STATUS_INTERNAL_ERROR);

--- a/src/infiniop/ops/rearrange/cuda/rearrange_kernel.cuh
+++ b/src/infiniop/ops/rearrange/cuda/rearrange_kernel.cuh
@@ -63,13 +63,13 @@ struct Constraint {
                size_t remaining                                                                                                                                                 \
                    = blockIdx.x;                                                                                                                                                \
                                                                                                                                                                                 \
-                for (ssize_t i = grid_array_size - 1; i >= 0; i--) {                                                                                                             \
+                for (ptrdiff_t i = grid_array_size - 1; i >= 0; i--) {                                                                                                           \
                    size_t idx = remaining % grid_len.a[i];                                                                                                                      \
                    remaining /= grid_len.a[i];                                                                                                                                  \
                    src_offset += idx * src_grid_stride.a[i];                                                                                                                    \
                    dst_offset += idx * dst_grid_stride.a[i];                                                                                                                    \
                    if (constraint_num > 0) {                                                                                                                                    \
-                        for (ssize_t j = 0; j < constraint_num; j++) {                                                                                                           \
+                        for (ptrdiff_t j = 0; j < constraint_num; j++) {                                                                                                         \
                            if (i == constraints.a[j].grid_idx) {                                                                                                                \
                                constraints_grid_idx_multiple[j] = idx * constraints.a[j].grid_div_block;                                                                        \
                            }                                                                                                                                                    \
@@ -80,7 +80,7 @@ struct Constraint {
                /* 将结果存入共享内存 */                                                                                                                                \
                shared_src_offset = src_offset;                                                                                                                                  \
                shared_dst_offset = dst_offset;                                                                                                                                  \
-                for (ssize_t j = 0; j < constraint_num; j++) {                                                                                                                   \
+                for (ptrdiff_t j = 0; j < constraint_num; j++) {                                                                                                                 \
                    shared_constraints_grid_idx_multiple[j] = constraints_grid_idx_multiple[j];                                                                                  \
                }                                                                                                                                                                \
            }                                                                                                                                                                    \
@@ -92,18 +92,18 @@ struct Constraint {
            ptrdiff_t src_offset = shared_src_offset;                                                                                                                            \
            ptrdiff_t dst_offset = shared_dst_offset;                                                                                                                            \
            ARRAY_TYPE_SIZE constraints_grid_idx_multiple[constraint_num > 0 ? constraint_num : 1];                                                                              \
-            for (ssize_t j = 0; j < constraint_num; j++) {                                                                                                                       \
+            for (ptrdiff_t j = 0; j < constraint_num; j++) {                                                                                                                     \
                constraints_grid_idx_multiple[j] = shared_constraints_grid_idx_multiple[j];                                                                                      \
            }                                                                                                                                                                    \
                                                                                                                                                                                 \
-            for (ssize_t i = block_array_size - 1; i >= 0; i--) {                                                                                                                \
+            for (ptrdiff_t i = block_array_size - 1; i >= 0; i--) {                                                                                                              \
                size_t idx = remaining % block_len.a[i];                                                                                                                         \
                remaining /= block_len.a[i];                                                                                                                                     \
                /* 计算偏移量 */                                                                                                                                            \
                src_offset += idx * src_block_stride.a[i];                                                                                                                       \
                dst_offset += idx * dst_block_stride.a[i];                                                                                                                       \
                if (constraint_num > 0) {                                                                                                                                        \
-                    for (ssize_t j = 0; j < constraint_num; j++) {                                                                                                               \
+                    for (ptrdiff_t j = 0; j < constraint_num; j++) {                                                                                                             \
                        if (i == constraints.a[j].block_idx) {                                                                                                                   \
                            if (constraints_grid_idx_multiple[j] + idx >= constraints.a[j].total_len) {                                                                          \
                                return;                                                                                                                                          \
@@ -115,7 +115,7 @@ struct Constraint {
                                                                                                                                                                                 \
            src_offset += remaining * src_block_stride.a[0];                                                                                                                     \
            dst_offset += remaining * dst_block_stride.a[0];                                                                                                                     \
-            for (ssize_t j = 0; j < constraint_num; j++) {                                                                                                                       \
+            for (ptrdiff_t j = 0; j < constraint_num; j++) {                                                                                                                     \
                if (0 == constraints.a[j].block_idx) {                                                                                                                           \
                    if (constraints_grid_idx_multiple[j] + remaining >= constraints.a[j].total_len) {                                                                            \
                        return;                                                                                                                                                  \
@@ -133,7 +133,7 @@ struct Constraint {
                ptrdiff_t dst_offset = 0;                                                                                                                                        \
                size_t remaining = blockIdx.x;                                                                                                                                   \
                                                                                                                                                                                 \
-                for (ssize_t i = grid_array_size - 1; i >= 0; i--) {                                                                                                             \
+                for (ptrdiff_t i = grid_array_size - 1; i >= 0; i--) {                                                                                                           \
                    size_t idx = remaining % grid_len.a[i];                                                                                                                      \
                    remaining /= grid_len.a[i];                                                                                                                                  \
                    src_offset += idx * src_grid_stride.a[i];                                                                                                                    \
@@ -152,7 +152,7 @@ struct Constraint {
            ptrdiff_t src_offset = shared_src_offset;                                                                                                                            \
            ptrdiff_t dst_offset = shared_dst_offset;                                                                                                                            \
                                                                                                                                                                                 \
-            for (ssize_t i = block_array_size - 1; i > 0; i--) {                                                                                                                 \
+            for (ptrdiff_t i = block_array_size - 1; i > 0; i--) {                                                                                                               \
                size_t idx = remaining % block_len.a[i];                                                                                                                         \
                remaining /= block_len.a[i];                                                                                                                                     \
                /* 计算偏移量 */                                                                                                                                            \

--- a/xmake.lua
+++ b/xmake.lua
@@ -12,6 +12,12 @@ if is_mode("debug") then
    add_defines("DEBUG_MODE")
 end

+if is_plat("windows") then
+    set_runtimes("MD")
+    add_ldflags("/utf-8", {force = true})
+    add_cxflags("/utf-8", {force = true})
+end
+
 -- CPU
 option("cpu")
    set_default(true)