Unverified Commit 301cc55c authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #303 from pengcheng888/issue/302

issue/302 - fix compile error of window system
parents 23077c42 b48d60ee
......@@ -56,6 +56,7 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
1. 项目配置
windows系统上,建议使用`xmake v2.8.9`编译项目。
- 查看当前配置
```shell
......@@ -73,6 +74,8 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
```shell
# 英伟达
# 可以指定 CUDA 路径, 一般环境变量为 `CUDA_HOME` 或者 `CUDA_ROOT`
# window系统:--cuda="%CUDA_HOME%"
# linux系统:--cuda=$CUDA_HOME
xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
# 寒武纪
......
......@@ -193,7 +193,7 @@ struct Algo {
argMax_(
kv_pair,
logits,
n,
static_cast<int>(n),
workspace,
workspace_size, stream);
castIdx<<<1, 1, 0, stream>>>((Tidx *)result, kv_pair);
......@@ -232,20 +232,20 @@ struct Algo {
auto block = cub::Min()((size_t)block_size, n);
auto grid = (n + block - 1) / block;
// sort
fillIndices<<<grid, block, 0, stream>>>(indices, n);
fillIndices<<<static_cast<unsigned int>(grid), static_cast<unsigned int>(block), 0, stream>>>(indices, static_cast<int>(n));
CHECK_CUDA(radixSort(
workspace_, workspace_size,
logits, sorted,
indices, indices_out,
n,
static_cast<int>(n),
stream));
// softmax
partialSoftmaxKernel<<<grid, block, 0, stream>>>(sorted, n, temperature);
partialSoftmaxKernel<<<static_cast<unsigned int>(grid), static_cast<unsigned int>(block), 0, stream>>>(sorted, static_cast<int>(n), temperature);
setSoftmaxMaxKernel<<<1, 1, 0, stream>>>(sorted);
// sum
CHECK_CUDA(inclusiveSum(
workspace_, workspace,
sorted, n,
sorted, static_cast<int>(n),
stream));
// sample
randomSampleKernel<<<1, 1, 0, stream>>>(
......
......@@ -297,7 +297,7 @@ utils::Result<RearrangeParams> prepareRearrangeParams(const utils::RearrangeMeta
block_len.push_back(split_dims[j].num_per_block);
src_block_stride.push_back(dims[i].src_stride);
dst_block_stride.push_back(dims[i].dst_stride);
split_dims[j].array_struct_idx_block = block_dim;
split_dims[j].array_struct_idx_block = static_cast<int>(block_dim);
block_dim += 1;
block_len_total *= split_dims[j].num_per_block;
}
......@@ -316,7 +316,7 @@ utils::Result<RearrangeParams> prepareRearrangeParams(const utils::RearrangeMeta
grid_len.push_back(split_dims[j].num_per_grid);
src_grid_stride.push_back(dims[i].src_stride * split_dims[j].num_per_block);
dst_grid_stride.push_back(dims[i].dst_stride * split_dims[j].num_per_block);
split_dims[j].array_struct_idx_grid = grid_len.size() - 1;
split_dims[j].array_struct_idx_grid = static_cast<int>(grid_len.size() - 1);
}
}
......@@ -420,7 +420,7 @@ infiniStatus_t launchKernel(
CHECK_OR_RETURN(cudaLaunchKernel(
kernel_func,
grid_size, BLOCK_SIZE,
static_cast<unsigned int>(grid_size), static_cast<unsigned int>(BLOCK_SIZE),
args, 0, stream)
== cudaSuccess,
INFINI_STATUS_INTERNAL_ERROR);
......
......@@ -63,13 +63,13 @@ struct Constraint {
size_t remaining \
= blockIdx.x; \
\
for (ssize_t i = grid_array_size - 1; i >= 0; i--) { \
for (ptrdiff_t i = grid_array_size - 1; i >= 0; i--) { \
size_t idx = remaining % grid_len.a[i]; \
remaining /= grid_len.a[i]; \
src_offset += idx * src_grid_stride.a[i]; \
dst_offset += idx * dst_grid_stride.a[i]; \
if (constraint_num > 0) { \
for (ssize_t j = 0; j < constraint_num; j++) { \
for (ptrdiff_t j = 0; j < constraint_num; j++) { \
if (i == constraints.a[j].grid_idx) { \
constraints_grid_idx_multiple[j] = idx * constraints.a[j].grid_div_block; \
} \
......@@ -80,7 +80,7 @@ struct Constraint {
/* 将结果存入共享内存 */ \
shared_src_offset = src_offset; \
shared_dst_offset = dst_offset; \
for (ssize_t j = 0; j < constraint_num; j++) { \
for (ptrdiff_t j = 0; j < constraint_num; j++) { \
shared_constraints_grid_idx_multiple[j] = constraints_grid_idx_multiple[j]; \
} \
} \
......@@ -92,18 +92,18 @@ struct Constraint {
ptrdiff_t src_offset = shared_src_offset; \
ptrdiff_t dst_offset = shared_dst_offset; \
ARRAY_TYPE_SIZE constraints_grid_idx_multiple[constraint_num > 0 ? constraint_num : 1]; \
for (ssize_t j = 0; j < constraint_num; j++) { \
for (ptrdiff_t j = 0; j < constraint_num; j++) { \
constraints_grid_idx_multiple[j] = shared_constraints_grid_idx_multiple[j]; \
} \
\
for (ssize_t i = block_array_size - 1; i >= 0; i--) { \
for (ptrdiff_t i = block_array_size - 1; i >= 0; i--) { \
size_t idx = remaining % block_len.a[i]; \
remaining /= block_len.a[i]; \
/* 计算偏移量 */ \
src_offset += idx * src_block_stride.a[i]; \
dst_offset += idx * dst_block_stride.a[i]; \
if (constraint_num > 0) { \
for (ssize_t j = 0; j < constraint_num; j++) { \
for (ptrdiff_t j = 0; j < constraint_num; j++) { \
if (i == constraints.a[j].block_idx) { \
if (constraints_grid_idx_multiple[j] + idx >= constraints.a[j].total_len) { \
return; \
......@@ -115,7 +115,7 @@ struct Constraint {
\
src_offset += remaining * src_block_stride.a[0]; \
dst_offset += remaining * dst_block_stride.a[0]; \
for (ssize_t j = 0; j < constraint_num; j++) { \
for (ptrdiff_t j = 0; j < constraint_num; j++) { \
if (0 == constraints.a[j].block_idx) { \
if (constraints_grid_idx_multiple[j] + remaining >= constraints.a[j].total_len) { \
return; \
......@@ -133,7 +133,7 @@ struct Constraint {
ptrdiff_t dst_offset = 0; \
size_t remaining = blockIdx.x; \
\
for (ssize_t i = grid_array_size - 1; i >= 0; i--) { \
for (ptrdiff_t i = grid_array_size - 1; i >= 0; i--) { \
size_t idx = remaining % grid_len.a[i]; \
remaining /= grid_len.a[i]; \
src_offset += idx * src_grid_stride.a[i]; \
......@@ -152,7 +152,7 @@ struct Constraint {
ptrdiff_t src_offset = shared_src_offset; \
ptrdiff_t dst_offset = shared_dst_offset; \
\
for (ssize_t i = block_array_size - 1; i > 0; i--) { \
for (ptrdiff_t i = block_array_size - 1; i > 0; i--) { \
size_t idx = remaining % block_len.a[i]; \
remaining /= block_len.a[i]; \
/* 计算偏移量 */ \
......
......@@ -12,6 +12,12 @@ if is_mode("debug") then
add_defines("DEBUG_MODE")
end
if is_plat("windows") then
set_runtimes("MD")
add_ldflags("/utf-8", {force = true})
add_cxflags("/utf-8", {force = true})
end
-- CPU
option("cpu")
set_default(true)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment