Commit cf719ba7 authored by zhangyue's avatar zhangyue
Browse files

issue/209: ptrdiff_t -> _ptrdiff_t

parent f4a1754c
...@@ -31,8 +31,8 @@ inline __device__ void atomicAddF32(__shared_ptr__ float *ptr, float value) { ...@@ -31,8 +31,8 @@ inline __device__ void atomicAddF32(__shared_ptr__ float *ptr, float value) {
inline __device__ size_t indexToReducedOffset( inline __device__ size_t indexToReducedOffset(
size_t flat_index, size_t flat_index,
size_t ndim, size_t ndim,
const ptrdiff_t *broadcasted_strides, const _ptrdiff_t *broadcasted_strides,
const ptrdiff_t *target_strides) { const _ptrdiff_t *target_strides) {
size_t res = 0; size_t res = 0;
for (size_t i = 0; i < ndim; ++i) { for (size_t i = 0; i < ndim; ++i) {
...@@ -47,7 +47,7 @@ inline __device__ size_t indexToOffset( ...@@ -47,7 +47,7 @@ inline __device__ size_t indexToOffset(
size_t flat_index, size_t flat_index,
size_t ndim, size_t ndim,
const _size_t *shape, const _size_t *shape,
const ptrdiff_t *strides) { const _ptrdiff_t *strides) {
size_t res = 0; size_t res = 0;
for (size_t i = ndim; i-- > 0;) { for (size_t i = ndim; i-- > 0;) {
......
...@@ -2,17 +2,16 @@ ...@@ -2,17 +2,16 @@
#define __INFINIOP_KUNLUN_DTYPE_H__ #define __INFINIOP_KUNLUN_DTYPE_H__
#include "xpu/kernel/xtdk.h" #include "xpu/kernel/xtdk.h"
#include "xpu/kernel/xtdk_io.h"
#include "xpu/kernel/xtdk_math.h" #include "xpu/kernel/xtdk_math.h"
#include "xpu/kernel/xtdk_simd.h" #include "xpu/kernel/xtdk_simd.h"
#include "xpu/runtime.h" #include "xpu/runtime.h"
// kunlun ptrdiff_t* is used to save ptrdiff_t array // kunlun ptrdiff_t* is used to save ptrdiff_t array
// copied from host // copied from host
typedef struct ptrdiff_t { typedef struct _ptrdiff_t {
long value; // 32 bit long value; // 32 bit
long padding; // 32 bit long padding; // 32 bit
} ptrdiff_t; } _ptrdiff_t;
// same as ptrdiff // same as ptrdiff
typedef struct _size_t { typedef struct _size_t {
......
...@@ -54,7 +54,6 @@ struct DeviceImpl::Opaque { ...@@ -54,7 +54,6 @@ struct DeviceImpl::Opaque {
reinterpret_cast<const void *const *>(d_inputs_arr), reinterpret_cast<const void *const *>(d_inputs_arr),
stream, stream,
args...); args...);
// std::forward<Args>(args)...);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
#define __INFINIOP_ELEMENTWISE_KUNLUN_XPU__ #define __INFINIOP_ELEMENTWISE_KUNLUN_XPU__
#include "../../devices/kunlun/kunlun_kernel_common.h" #include "../../devices/kunlun/kunlun_kernel_common.h"
#include "xpu/kernel/xtdk_io.h"
// #include <cstdio>
using namespace device::kunlun::kernel; using namespace device::kunlun::kernel;
...@@ -16,8 +14,8 @@ struct InputIndexer { ...@@ -16,8 +14,8 @@ struct InputIndexer {
const bool *input_contiguous; const bool *input_contiguous;
const bool *input_broadcasted; const bool *input_broadcasted;
const _size_t *input_shapes; const _size_t *input_shapes;
const ptrdiff_t *input_strides; const _ptrdiff_t *input_strides;
const ptrdiff_t *output_strides; const _ptrdiff_t *output_strides;
__device__ size_t operator()(size_t input_id) const { __device__ size_t operator()(size_t input_id) const {
return input_contiguous[input_id] return input_contiguous[input_id]
...@@ -43,7 +41,7 @@ getOutputIndex(size_t idx, ...@@ -43,7 +41,7 @@ getOutputIndex(size_t idx,
bool is_contiguous, bool is_contiguous,
size_t ndim, size_t ndim,
const _size_t *shape, const _size_t *shape,
const ptrdiff_t *strides) { const _ptrdiff_t *strides) {
return is_contiguous ? idx : indexToOffset(idx, ndim, shape, strides); return is_contiguous ? idx : indexToOffset(idx, ndim, shape, strides);
} }
...@@ -85,8 +83,8 @@ __global__ void elementwiseKernel( ...@@ -85,8 +83,8 @@ __global__ void elementwiseKernel(
const bool *input_broadcasted_gm, const bool *input_broadcasted_gm,
const _size_t *output_shape_gm, const _size_t *output_shape_gm,
const _size_t *input_shapes_gm, const _size_t *input_shapes_gm,
const ptrdiff_t *output_strides_gm, const _ptrdiff_t *output_strides_gm,
const ptrdiff_t *input_strides_gm, const _ptrdiff_t *input_strides_gm,
Tdata *output, Tdata *output,
const void *const *inputs, const void *const *inputs,
Args... args) { Args... args) {
...@@ -113,10 +111,10 @@ __global__ void elementwiseKernel( ...@@ -113,10 +111,10 @@ __global__ void elementwiseKernel(
__local__ bool input_broadcasted[N]; __local__ bool input_broadcasted[N];
// Input shape/strides // Input shape/strides
__local__ _size_t input_shapes[N * ndim]; __local__ _size_t input_shapes[N * ndim];
__local__ ptrdiff_t input_strides[N * ndim]; __local__ _ptrdiff_t input_strides[N * ndim];
// Output shape/strides // Output shape/strides
__local__ _size_t output_shape[ndim]; __local__ _size_t output_shape[ndim];
__local__ ptrdiff_t output_strides[ndim]; __local__ _ptrdiff_t output_strides[ndim];
// Inputs gm ptr buf // Inputs gm ptr buf
__local__ __global_ptr__ Tdata *typed_inputs_ptr[N]; __local__ __global_ptr__ Tdata *typed_inputs_ptr[N];
...@@ -124,9 +122,9 @@ __global__ void elementwiseKernel( ...@@ -124,9 +122,9 @@ __global__ void elementwiseKernel(
GM2LM_ASYNC(input_contiguous_gm, input_contiguous, N * sizeof(bool)); GM2LM_ASYNC(input_contiguous_gm, input_contiguous, N * sizeof(bool));
GM2LM_ASYNC(input_broadcasted_gm, input_broadcasted, N * sizeof(bool)); GM2LM_ASYNC(input_broadcasted_gm, input_broadcasted, N * sizeof(bool));
GM2LM_ASYNC(input_shapes_gm, input_shapes, N * ndim * sizeof(_size_t)); GM2LM_ASYNC(input_shapes_gm, input_shapes, N * ndim * sizeof(_size_t));
GM2LM_ASYNC(input_strides_gm, input_strides, N * ndim * sizeof(ptrdiff_t)); GM2LM_ASYNC(input_strides_gm, input_strides, N * ndim * sizeof(_ptrdiff_t));
GM2LM_ASYNC(output_shape_gm, output_shape, ndim * sizeof(_size_t)); GM2LM_ASYNC(output_shape_gm, output_shape, ndim * sizeof(_size_t));
GM2LM_ASYNC(output_strides_gm, output_strides, ndim * sizeof(ptrdiff_t)); GM2LM_ASYNC(output_strides_gm, output_strides, ndim * sizeof(_ptrdiff_t));
GM2LM_ASYNC(typed_inputs, typed_inputs_ptr, N * sizeof(__global_ptr__ Tdata *)); GM2LM_ASYNC(typed_inputs, typed_inputs_ptr, N * sizeof(__global_ptr__ Tdata *));
mfence(); mfence();
...@@ -173,8 +171,8 @@ __global__ void elementwiseKernel( ...@@ -173,8 +171,8 @@ __global__ void elementwiseKernel(
reinterpret_cast<const bool *>(input_broadcasted), \ reinterpret_cast<const bool *>(input_broadcasted), \
reinterpret_cast<const _size_t *>(output_shape), \ reinterpret_cast<const _size_t *>(output_shape), \
reinterpret_cast<const _size_t *>(input_shapes), \ reinterpret_cast<const _size_t *>(input_shapes), \
reinterpret_cast<const ptrdiff_t *>(output_strides), \ reinterpret_cast<const _ptrdiff_t *>(output_strides), \
reinterpret_cast<const ptrdiff_t *>(input_strides), \ reinterpret_cast<const _ptrdiff_t *>(input_strides), \
reinterpret_cast<Tdata *>(output), inputs, args...); \ reinterpret_cast<Tdata *>(output), inputs, args...); \
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment