Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
cf719ba7
Commit
cf719ba7
authored
Apr 30, 2025
by
zhangyue
Browse files
issue/209: ptrdiff_t -> _ptrdiff_t
parent
f4a1754c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
20 deletions
+16
-20
src/infiniop/devices/kunlun/kunlun_kernel_common.h
src/infiniop/devices/kunlun/kunlun_kernel_common.h
+3
-3
src/infiniop/devices/kunlun/kunlun_kernel_dtype.h
src/infiniop/devices/kunlun/kunlun_kernel_dtype.h
+2
-3
src/infiniop/elementwise/kunlun/elementwise_kunlun.h
src/infiniop/elementwise/kunlun/elementwise_kunlun.h
+0
-1
src/infiniop/elementwise/kunlun/elementwise_kunlun_kernel.h
src/infiniop/elementwise/kunlun/elementwise_kunlun_kernel.h
+11
-13
No files found.
src/infiniop/devices/kunlun/kunlun_kernel_common.h
View file @
cf719ba7
...
@@ -31,8 +31,8 @@ inline __device__ void atomicAddF32(__shared_ptr__ float *ptr, float value) {
...
@@ -31,8 +31,8 @@ inline __device__ void atomicAddF32(__shared_ptr__ float *ptr, float value) {
inline
__device__
size_t
indexToReducedOffset
(
inline
__device__
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
flat_index
,
size_t
ndim
,
size_t
ndim
,
const
ptrdiff_t
*
broadcasted_strides
,
const
_
ptrdiff_t
*
broadcasted_strides
,
const
ptrdiff_t
*
target_strides
)
{
const
_
ptrdiff_t
*
target_strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
...
@@ -47,7 +47,7 @@ inline __device__ size_t indexToOffset(
...
@@ -47,7 +47,7 @@ inline __device__ size_t indexToOffset(
size_t
flat_index
,
size_t
flat_index
,
size_t
ndim
,
size_t
ndim
,
const
_size_t
*
shape
,
const
_size_t
*
shape
,
const
ptrdiff_t
*
strides
)
{
const
_
ptrdiff_t
*
strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
ndim
;
i
--
>
0
;)
{
for
(
size_t
i
=
ndim
;
i
--
>
0
;)
{
...
...
src/infiniop/devices/kunlun/kunlun_kernel_dtype.h
View file @
cf719ba7
...
@@ -2,17 +2,16 @@
...
@@ -2,17 +2,16 @@
#define __INFINIOP_KUNLUN_DTYPE_H__
#define __INFINIOP_KUNLUN_DTYPE_H__
#include "xpu/kernel/xtdk.h"
#include "xpu/kernel/xtdk.h"
#include "xpu/kernel/xtdk_io.h"
#include "xpu/kernel/xtdk_math.h"
#include "xpu/kernel/xtdk_math.h"
#include "xpu/kernel/xtdk_simd.h"
#include "xpu/kernel/xtdk_simd.h"
#include "xpu/runtime.h"
#include "xpu/runtime.h"
// kunlun ptrdiff_t* is used to save ptrdiff_t array
// kunlun ptrdiff_t* is used to save ptrdiff_t array
// copied from host
// copied from host
typedef
struct
ptrdiff_t
{
typedef
struct
_
ptrdiff_t
{
long
value
;
// 32 bit
long
value
;
// 32 bit
long
padding
;
// 32 bit
long
padding
;
// 32 bit
}
ptrdiff_t
;
}
_
ptrdiff_t
;
// same as ptrdiff
// same as ptrdiff
typedef
struct
_size_t
{
typedef
struct
_size_t
{
...
...
src/infiniop/elementwise/kunlun/elementwise_kunlun.h
View file @
cf719ba7
...
@@ -54,7 +54,6 @@ struct DeviceImpl::Opaque {
...
@@ -54,7 +54,6 @@ struct DeviceImpl::Opaque {
reinterpret_cast
<
const
void
*
const
*>
(
d_inputs_arr
),
reinterpret_cast
<
const
void
*
const
*>
(
d_inputs_arr
),
stream
,
stream
,
args
...);
args
...);
// std::forward<Args>(args)...);
return
INFINI_STATUS_SUCCESS
;
return
INFINI_STATUS_SUCCESS
;
}
}
...
...
src/infiniop/elementwise/kunlun/elementwise_kunlun_kernel.h
View file @
cf719ba7
...
@@ -2,8 +2,6 @@
...
@@ -2,8 +2,6 @@
#define __INFINIOP_ELEMENTWISE_KUNLUN_XPU__
#define __INFINIOP_ELEMENTWISE_KUNLUN_XPU__
#include "../../devices/kunlun/kunlun_kernel_common.h"
#include "../../devices/kunlun/kunlun_kernel_common.h"
#include "xpu/kernel/xtdk_io.h"
// #include <cstdio>
using
namespace
device
::
kunlun
::
kernel
;
using
namespace
device
::
kunlun
::
kernel
;
...
@@ -16,8 +14,8 @@ struct InputIndexer {
...
@@ -16,8 +14,8 @@ struct InputIndexer {
const
bool
*
input_contiguous
;
const
bool
*
input_contiguous
;
const
bool
*
input_broadcasted
;
const
bool
*
input_broadcasted
;
const
_size_t
*
input_shapes
;
const
_size_t
*
input_shapes
;
const
ptrdiff_t
*
input_strides
;
const
_
ptrdiff_t
*
input_strides
;
const
ptrdiff_t
*
output_strides
;
const
_
ptrdiff_t
*
output_strides
;
__device__
size_t
operator
()(
size_t
input_id
)
const
{
__device__
size_t
operator
()(
size_t
input_id
)
const
{
return
input_contiguous
[
input_id
]
return
input_contiguous
[
input_id
]
...
@@ -43,7 +41,7 @@ getOutputIndex(size_t idx,
...
@@ -43,7 +41,7 @@ getOutputIndex(size_t idx,
bool
is_contiguous
,
bool
is_contiguous
,
size_t
ndim
,
size_t
ndim
,
const
_size_t
*
shape
,
const
_size_t
*
shape
,
const
ptrdiff_t
*
strides
)
{
const
_
ptrdiff_t
*
strides
)
{
return
is_contiguous
?
idx
:
indexToOffset
(
idx
,
ndim
,
shape
,
strides
);
return
is_contiguous
?
idx
:
indexToOffset
(
idx
,
ndim
,
shape
,
strides
);
}
}
...
@@ -85,8 +83,8 @@ __global__ void elementwiseKernel(
...
@@ -85,8 +83,8 @@ __global__ void elementwiseKernel(
const
bool
*
input_broadcasted_gm
,
const
bool
*
input_broadcasted_gm
,
const
_size_t
*
output_shape_gm
,
const
_size_t
*
output_shape_gm
,
const
_size_t
*
input_shapes_gm
,
const
_size_t
*
input_shapes_gm
,
const
ptrdiff_t
*
output_strides_gm
,
const
_
ptrdiff_t
*
output_strides_gm
,
const
ptrdiff_t
*
input_strides_gm
,
const
_
ptrdiff_t
*
input_strides_gm
,
Tdata
*
output
,
Tdata
*
output
,
const
void
*
const
*
inputs
,
const
void
*
const
*
inputs
,
Args
...
args
)
{
Args
...
args
)
{
...
@@ -113,10 +111,10 @@ __global__ void elementwiseKernel(
...
@@ -113,10 +111,10 @@ __global__ void elementwiseKernel(
__local__
bool
input_broadcasted
[
N
];
__local__
bool
input_broadcasted
[
N
];
// Input shape/strides
// Input shape/strides
__local__
_size_t
input_shapes
[
N
*
ndim
];
__local__
_size_t
input_shapes
[
N
*
ndim
];
__local__
ptrdiff_t
input_strides
[
N
*
ndim
];
__local__
_
ptrdiff_t
input_strides
[
N
*
ndim
];
// Output shape/strides
// Output shape/strides
__local__
_size_t
output_shape
[
ndim
];
__local__
_size_t
output_shape
[
ndim
];
__local__
ptrdiff_t
output_strides
[
ndim
];
__local__
_
ptrdiff_t
output_strides
[
ndim
];
// Inputs gm ptr buf
// Inputs gm ptr buf
__local__
__global_ptr__
Tdata
*
typed_inputs_ptr
[
N
];
__local__
__global_ptr__
Tdata
*
typed_inputs_ptr
[
N
];
...
@@ -124,9 +122,9 @@ __global__ void elementwiseKernel(
...
@@ -124,9 +122,9 @@ __global__ void elementwiseKernel(
GM2LM_ASYNC
(
input_contiguous_gm
,
input_contiguous
,
N
*
sizeof
(
bool
));
GM2LM_ASYNC
(
input_contiguous_gm
,
input_contiguous
,
N
*
sizeof
(
bool
));
GM2LM_ASYNC
(
input_broadcasted_gm
,
input_broadcasted
,
N
*
sizeof
(
bool
));
GM2LM_ASYNC
(
input_broadcasted_gm
,
input_broadcasted
,
N
*
sizeof
(
bool
));
GM2LM_ASYNC
(
input_shapes_gm
,
input_shapes
,
N
*
ndim
*
sizeof
(
_size_t
));
GM2LM_ASYNC
(
input_shapes_gm
,
input_shapes
,
N
*
ndim
*
sizeof
(
_size_t
));
GM2LM_ASYNC
(
input_strides_gm
,
input_strides
,
N
*
ndim
*
sizeof
(
ptrdiff_t
));
GM2LM_ASYNC
(
input_strides_gm
,
input_strides
,
N
*
ndim
*
sizeof
(
_
ptrdiff_t
));
GM2LM_ASYNC
(
output_shape_gm
,
output_shape
,
ndim
*
sizeof
(
_size_t
));
GM2LM_ASYNC
(
output_shape_gm
,
output_shape
,
ndim
*
sizeof
(
_size_t
));
GM2LM_ASYNC
(
output_strides_gm
,
output_strides
,
ndim
*
sizeof
(
ptrdiff_t
));
GM2LM_ASYNC
(
output_strides_gm
,
output_strides
,
ndim
*
sizeof
(
_
ptrdiff_t
));
GM2LM_ASYNC
(
typed_inputs
,
typed_inputs_ptr
,
N
*
sizeof
(
__global_ptr__
Tdata
*
));
GM2LM_ASYNC
(
typed_inputs
,
typed_inputs_ptr
,
N
*
sizeof
(
__global_ptr__
Tdata
*
));
mfence
();
mfence
();
...
@@ -173,8 +171,8 @@ __global__ void elementwiseKernel(
...
@@ -173,8 +171,8 @@ __global__ void elementwiseKernel(
reinterpret_cast<const bool *>(input_broadcasted), \
reinterpret_cast<const bool *>(input_broadcasted), \
reinterpret_cast<const _size_t *>(output_shape), \
reinterpret_cast<const _size_t *>(output_shape), \
reinterpret_cast<const _size_t *>(input_shapes), \
reinterpret_cast<const _size_t *>(input_shapes), \
reinterpret_cast<const ptrdiff_t *>(output_strides), \
reinterpret_cast<const
_
ptrdiff_t *>(output_strides), \
reinterpret_cast<const ptrdiff_t *>(input_strides), \
reinterpret_cast<const
_
ptrdiff_t *>(input_strides), \
reinterpret_cast<Tdata *>(output), inputs, args...); \
reinterpret_cast<Tdata *>(output), inputs, args...); \
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment