Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
64849b43
"git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "02671abb44c8c34a940d79e777fc7c3b3ebae50e"
Commit
64849b43
authored
Feb 18, 2025
by
PanZezhong
Browse files
issue/53: 形状统一为size_t,步长统一为ptrdiff_t
parent
3c31dc6c
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
59 additions
and
59 deletions
+59
-59
include/infiniop/ops/attention.h
include/infiniop/ops/attention.h
+1
-1
include/infiniop/ops/avg_pool.h
include/infiniop/ops/avg_pool.h
+4
-4
include/infiniop/ops/max_pool.h
include/infiniop/ops/max_pool.h
+4
-4
include/infiniop/tensor_descriptor.h
include/infiniop/tensor_descriptor.h
+2
-2
src/infiniop/devices/cpu/common_cpu.cc
src/infiniop/devices/cpu/common_cpu.cc
+4
-4
src/infiniop/devices/cpu/common_cpu.h
src/infiniop/devices/cpu/common_cpu.h
+2
-2
src/infiniop/devices/cuda/common_cuda.cuh
src/infiniop/devices/cuda/common_cuda.cuh
+3
-3
src/infiniop/ops/causal_softmax/operator.cc
src/infiniop/ops/causal_softmax/operator.cc
+2
-2
src/infiniop/ops/matmul/blas.h
src/infiniop/ops/matmul/blas.h
+4
-4
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
+2
-2
src/infiniop/ops/matmul/cpu/matmul_cpu_api.h
src/infiniop/ops/matmul/cpu/matmul_cpu_api.h
+2
-2
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
+1
-1
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
+2
-2
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
+1
-1
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+2
-2
src/infiniop/ops/rms_norm/operator.cc
src/infiniop/ops/rms_norm/operator.cc
+2
-2
src/infiniop/ops/rotary_embedding/operator.cc
src/infiniop/ops/rotary_embedding/operator.cc
+2
-2
src/infiniop/ops/utils.h
src/infiniop/ops/utils.h
+14
-14
src/infiniop/tensor_descriptor.cc
src/infiniop/tensor_descriptor.cc
+5
-5
No files found.
include/infiniop/ops/attention.h
View file @
64849b43
...
@@ -15,7 +15,7 @@ __C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t
...
@@ -15,7 +15,7 @@ __C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t
infiniopTensorDescriptor_t
v_desc
,
infiniopTensorDescriptor_t
v_desc
,
infiniopTensorDescriptor_t
k_cache_desc
,
infiniopTensorDescriptor_t
k_cache_desc
,
infiniopTensorDescriptor_t
v_cache_desc
,
infiniopTensorDescriptor_t
v_cache_desc
,
uint64
_t
pos
);
size
_t
pos
);
__C
__export
infiniopStatus_t
infiniopGetAttentionWorkspaceSize
(
infiniopAttentionDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopGetAttentionWorkspaceSize
(
infiniopAttentionDescriptor_t
desc
,
size_t
*
size
);
...
...
include/infiniop/ops/avg_pool.h
View file @
64849b43
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateAvgPoolDescriptor(infiniopHandle_t h
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateAvgPoolDescriptor(infiniopHandle_t h
infiniopAvgPoolDescriptor_t
*
desc_ptr
,
infiniopAvgPoolDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
x
,
infiniopTensorDescriptor_t
x
,
uint64
_t
const
*
kernel_shape
,
size
_t
const
*
kernel_shape
,
uint64
_t
const
*
pads
,
size
_t
const
*
pads
,
int64
_t
const
*
strides
,
ptrdiff
_t
const
*
strides
,
uint64
_t
n
);
size
_t
n
);
__C
__export
infiniopStatus_t
infiniopGetAvgPoolWorkspaceSize
(
infiniopAvgPoolDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopGetAvgPoolWorkspaceSize
(
infiniopAvgPoolDescriptor_t
desc
,
size_t
*
size
);
...
...
include/infiniop/ops/max_pool.h
View file @
64849b43
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateMaxPoolDescriptor(infiniopHandle_t h
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateMaxPoolDescriptor(infiniopHandle_t h
infiniopMaxPoolDescriptor_t
*
desc_ptr
,
infiniopMaxPoolDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
x
,
infiniopTensorDescriptor_t
x
,
uint64
_t
const
*
kernel_shape
,
size
_t
const
*
kernel_shape
,
uint64
_t
const
*
pads
,
size
_t
const
*
pads
,
int64
_t
const
*
strides
,
ptrdiff
_t
const
*
strides
,
uint64
_t
n
);
size
_t
n
);
__C
__export
infiniopStatus_t
infiniopGetMaxPoolWorkspaceSize
(
infiniopMaxPoolDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopGetMaxPoolWorkspaceSize
(
infiniopMaxPoolDescriptor_t
desc
,
size_t
*
size
);
...
...
include/infiniop/tensor_descriptor.h
View file @
64849b43
...
@@ -12,12 +12,12 @@ struct InfiniopTensorDescriptor {
...
@@ -12,12 +12,12 @@ struct InfiniopTensorDescriptor {
// Shape of the tensor, ndim elements
// Shape of the tensor, ndim elements
size_t
*
shape
;
size_t
*
shape
;
// Stride of each dimension in elements, ndim elements
// Stride of each dimension in elements, ndim elements
int64
_t
*
strides
;
ptrdiff
_t
*
strides
;
};
};
typedef
struct
InfiniopTensorDescriptor
*
infiniopTensorDescriptor_t
;
typedef
struct
InfiniopTensorDescriptor
*
infiniopTensorDescriptor_t
;
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape
,
int64
_t
const
*
strides
,
infiniDtype_t
dtype
);
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape
,
ptrdiff
_t
const
*
strides
,
infiniDtype_t
dtype
);
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
...
...
src/infiniop/devices/cpu/common_cpu.cc
View file @
64849b43
...
@@ -60,8 +60,8 @@ uint16_t f32_to_f16(float val) {
...
@@ -60,8 +60,8 @@ uint16_t f32_to_f16(float val) {
}
}
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64
_t
const
*
broadcasted_strides
,
ptrdiff
_t
const
*
broadcasted_strides
,
int64
_t
const
*
target_strides
)
{
ptrdiff
_t
const
*
target_strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
...
@@ -71,7 +71,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim,
...
@@ -71,7 +71,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim,
}
}
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
int64
_t
const
*
strides
)
{
ptrdiff
_t
const
*
strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
ndim
;
i
--
>=
0
;)
{
for
(
size_t
i
=
ndim
;
i
--
>=
0
;)
{
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
...
@@ -81,7 +81,7 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
...
@@ -81,7 +81,7 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
}
}
size_t
getPaddedSize
(
size_t
ndim
,
size_t
*
shape
,
size_t
const
*
pads
)
{
size_t
getPaddedSize
(
size_t
ndim
,
size_t
*
shape
,
size_t
const
*
pads
)
{
uint64
_t
total_size
=
1
;
size
_t
total_size
=
1
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
total_size
*=
shape
[
i
]
+
(
i
<
2
?
0
:
2
*
pads
[
i
-
2
]);
total_size
*=
shape
[
i
]
+
(
i
<
2
?
0
:
2
*
pads
[
i
-
2
]);
}
}
...
...
src/infiniop/devices/cpu/common_cpu.h
View file @
64849b43
...
@@ -13,10 +13,10 @@ float f16_to_f32(uint16_t code);
...
@@ -13,10 +13,10 @@ float f16_to_f32(uint16_t code);
uint16_t
f32_to_f16
(
float
val
);
uint16_t
f32_to_f16
(
float
val
);
// return the memory offset of original tensor, given the flattened index of broadcasted tensor
// return the memory offset of original tensor, given the flattened index of broadcasted tensor
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64
_t
const
*
broadcasted_strides
,
int64
_t
const
*
target_strides
);
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
ptrdiff
_t
const
*
broadcasted_strides
,
ptrdiff
_t
const
*
target_strides
);
// return the memory offset a tensor given flattened index
// return the memory offset a tensor given flattened index
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
int64
_t
const
*
strides
);
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
ptrdiff
_t
const
*
strides
);
/**
/**
* get the total array size (element count) after applying padding for a
* get the total array size (element count) after applying padding for a
...
...
src/infiniop/devices/cuda/common_cuda.cuh
View file @
64849b43
...
@@ -96,8 +96,8 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
...
@@ -96,8 +96,8 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
// return the memory offset of original tensor, given the flattened index of
// return the memory offset of original tensor, given the flattened index of
// broadcasted tensor
// broadcasted tensor
inline
__device__
__host__
size_t
indexToReducedOffset
(
inline
__device__
__host__
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64
_t
const
*
broadcasted_strides
,
size_t
flat_index
,
size_t
ndim
,
ptrdiff
_t
const
*
broadcasted_strides
,
int64
_t
const
*
target_strides
)
{
ptrdiff
_t
const
*
target_strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
...
@@ -109,7 +109,7 @@ inline __device__ __host__ size_t indexToReducedOffset(
...
@@ -109,7 +109,7 @@ inline __device__ __host__ size_t indexToReducedOffset(
// get the memory offset of the given element in a tensor given its flat index
// get the memory offset of the given element in a tensor given its flat index
inline
__device__
__host__
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
inline
__device__
__host__
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
size_t
const
*
shape
,
int64
_t
const
*
strides
)
{
ptrdiff
_t
const
*
strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
ndim
;
i
--
>
0
;)
{
for
(
size_t
i
=
ndim
;
i
--
>
0
;)
{
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
...
...
src/infiniop/ops/causal_softmax/operator.cc
View file @
64849b43
...
@@ -40,7 +40,7 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
...
@@ -40,7 +40,7 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopGetCausalSoftmaxWorkspaceSize
(
infiniopCausalSoftmaxDescriptor_t
desc
,
uint64
_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetCausalSoftmaxWorkspaceSize
(
infiniopCausalSoftmaxDescriptor_t
desc
,
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -78,7 +78,7 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
...
@@ -78,7 +78,7 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopCausalSoftmax
(
infiniopCausalSoftmaxDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
data
,
void
*
stream
)
{
__C
infiniopStatus_t
infiniopCausalSoftmax
(
infiniopCausalSoftmaxDescriptor_t
desc
,
void
*
workspace
,
size
_t
workspace_size
,
void
*
data
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
...
src/infiniop/ops/matmul/blas.h
View file @
64849b43
...
@@ -9,11 +9,11 @@
...
@@ -9,11 +9,11 @@
typedef
struct
BlasMatrix
{
typedef
struct
BlasMatrix
{
size_t
ndim
;
size_t
ndim
;
size_t
batch
;
size_t
batch
;
int64
_t
stride
;
ptrdiff
_t
stride
;
size_t
rows
;
size_t
rows
;
size_t
cols
;
size_t
cols
;
int64
_t
row_stride
;
ptrdiff
_t
row_stride
;
int64
_t
col_stride
;
ptrdiff
_t
col_stride
;
BlasMatrix
()
{}
BlasMatrix
()
{}
...
@@ -56,7 +56,7 @@ typedef struct BlasMatrix {
...
@@ -56,7 +56,7 @@ typedef struct BlasMatrix {
std
::
swap
(
row_stride
,
col_stride
);
std
::
swap
(
row_stride
,
col_stride
);
}
}
int64
_t
ld
()
const
{
ptrdiff
_t
ld
()
const
{
if
(
this
->
row_stride
==
1
)
{
if
(
this
->
row_stride
==
1
)
{
return
this
->
col_stride
;
return
this
->
col_stride
;
}
else
{
}
else
{
...
...
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
View file @
64849b43
...
@@ -25,7 +25,7 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
...
@@ -25,7 +25,7 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
}
}
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
uint64
_t
*
size
)
{
size
_t
*
size
)
{
*
size
=
0
;
*
size
=
0
;
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
}
}
...
@@ -76,7 +76,7 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c,
...
@@ -76,7 +76,7 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c,
}
}
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
size
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
float
beta
)
{
void
const
*
b
,
float
alpha
,
float
beta
)
{
if
(
desc
->
dtype
==
INFINI_DTYPE_F16
)
{
if
(
desc
->
dtype
==
INFINI_DTYPE_F16
)
{
return
cpuCalculateMatmul
<
uint16_t
>
(
desc
,
c
,
beta
,
a
,
b
,
alpha
);
return
cpuCalculateMatmul
<
uint16_t
>
(
desc
,
c
,
beta
,
a
,
b
,
alpha
);
...
...
src/infiniop/ops/matmul/cpu/matmul_cpu_api.h
View file @
64849b43
...
@@ -14,10 +14,10 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
...
@@ -14,10 +14,10 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
infiniopTensorDescriptor_t
b_desc
);
infiniopTensorDescriptor_t
b_desc
);
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
uint64
_t
*
size
);
size
_t
*
size
);
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
size
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
float
beta
);
void
const
*
b
,
float
alpha
,
float
beta
);
infiniopStatus_t
cpuDestroyMatmulDescriptor
(
infiniopMatmulCpuDescriptor_t
desc
);
infiniopStatus_t
cpuDestroyMatmulDescriptor
(
infiniopMatmulCpuDescriptor_t
desc
);
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
View file @
64849b43
...
@@ -27,7 +27,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
...
@@ -27,7 +27,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
}
}
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
uint64
_t
*
size
)
{
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
size
_t
*
size
)
{
*
size
=
0
;
*
size
=
0
;
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
}
}
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
View file @
64849b43
...
@@ -13,11 +13,11 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
...
@@ -13,11 +13,11 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
);
infiniopTensorDescriptor_t
b_desc
);
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
uint64
_t
*
size
);
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
size
_t
*
size
);
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
void
*
workspace
,
void
*
workspace
,
uint64
_t
workspace_size
,
size
_t
workspace_size
,
void
*
c
,
void
*
c
,
void
const
*
a
,
void
const
*
a
,
void
const
*
b
,
void
const
*
b
,
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
View file @
64849b43
...
@@ -56,7 +56,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
...
@@ -56,7 +56,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
void
*
workspace
,
void
*
workspace
,
uint64
_t
workspace_size
,
size
_t
workspace_size
,
void
*
c
,
void
*
c
,
void
const
*
a
,
void
const
*
a
,
void
const
*
b
,
void
const
*
b
,
...
...
src/infiniop/ops/random_sample/operator.cc
View file @
64849b43
...
@@ -38,7 +38,7 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
...
@@ -38,7 +38,7 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
};
};
__C
infiniopStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
uint64
_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -77,7 +77,7 @@ __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDe
...
@@ -77,7 +77,7 @@ __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDe
__C
infiniopStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
__C
infiniopStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
void
*
workspace
,
uint64
_t
workspace_size
,
size
_t
workspace_size
,
void
*
result
,
void
*
result
,
void
const
*
probs
,
void
const
*
probs
,
float
random_val
,
float
random_val
,
...
...
src/infiniop/ops/rms_norm/operator.cc
View file @
64849b43
...
@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
...
@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
uint64
_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -83,7 +83,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
...
@@ -83,7 +83,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
__C
infiniopStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
size
_t
workspace_size
,
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
)
{
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
...
...
src/infiniop/ops/rotary_embedding/operator.cc
View file @
64849b43
...
@@ -53,7 +53,7 @@ __C infiniopStatus_t infiniopCreateRoPEDescriptor(
...
@@ -53,7 +53,7 @@ __C infiniopStatus_t infiniopCreateRoPEDescriptor(
}
}
__C
infiniopStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
__C
infiniopStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
uint64
_t
*
size
)
{
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -90,7 +90,7 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
...
@@ -90,7 +90,7 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
}
}
__C
infiniopStatus_t
infiniopRoPE
(
infiniopRoPEDescriptor_t
desc
,
__C
infiniopStatus_t
infiniopRoPE
(
infiniopRoPEDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
workspace
,
size
_t
workspace_size
,
void
*
t
,
void
const
*
pos_ids
,
void
*
t
,
void
const
*
pos_ids
,
void
const
*
sin_table
,
void
const
*
cos_table
,
void
const
*
sin_table
,
void
const
*
cos_table
,
void
*
stream
)
{
void
*
stream
)
{
...
...
src/infiniop/ops/utils.h
View file @
64849b43
...
@@ -37,9 +37,9 @@
...
@@ -37,9 +37,9 @@
} \
} \
} while (0)
} while (0)
inline
std
::
vector
<
int64
_t
>
getByteStrides
(
infiniopTensorDescriptor_t
desc
)
{
inline
std
::
vector
<
ptrdiff
_t
>
getByteStrides
(
infiniopTensorDescriptor_t
desc
)
{
std
::
vector
<
int64
_t
>
strides
(
desc
->
ndim
);
std
::
vector
<
ptrdiff
_t
>
strides
(
desc
->
ndim
);
for
(
uint64
_t
i
=
0
;
i
<
desc
->
ndim
;
i
++
)
{
for
(
size
_t
i
=
0
;
i
<
desc
->
ndim
;
i
++
)
{
strides
[
i
]
=
desc
->
strides
[
i
]
*
infiniSizeof
(
desc
->
dtype
);
strides
[
i
]
=
desc
->
strides
[
i
]
*
infiniSizeof
(
desc
->
dtype
);
}
}
return
strides
;
return
strides
;
...
@@ -54,11 +54,11 @@ inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
...
@@ -54,11 +54,11 @@ inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
}
}
// calculate the broadcasted shape for two tensors
// calculate the broadcasted shape for two tensors
inline
bool
getBroadcastShape
(
const
uint64
_t
*
shape1
,
uint64
_t
ndim1
,
inline
bool
getBroadcastShape
(
const
size
_t
*
shape1
,
size
_t
ndim1
,
const
uint64
_t
*
shape2
,
uint64
_t
ndim2
,
const
size
_t
*
shape2
,
size
_t
ndim2
,
uint64
_t
*
broadcast_shape
,
size
_t
*
broadcast_shape
,
uint64
_t
*
padded_shape1
,
uint64
_t
*
padded_shape2
,
size
_t
*
padded_shape1
,
size
_t
*
padded_shape2
,
uint64
_t
max_rank
)
{
size
_t
max_rank
)
{
// prepending and initializing
// prepending and initializing
std
::
fill
(
padded_shape1
,
padded_shape1
+
max_rank
,
1
);
std
::
fill
(
padded_shape1
,
padded_shape1
+
max_rank
,
1
);
std
::
fill
(
padded_shape2
,
padded_shape2
+
max_rank
,
1
);
std
::
fill
(
padded_shape2
,
padded_shape2
+
max_rank
,
1
);
...
@@ -82,8 +82,8 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
...
@@ -82,8 +82,8 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
c
,
infiniopTensorDescriptor_t
c
,
uint64
_t
broadcast_ndim
)
{
size
_t
broadcast_ndim
)
{
std
::
vector
<
uint64
_t
>
broadcast_shape_
(
broadcast_ndim
),
std
::
vector
<
size
_t
>
broadcast_shape_
(
broadcast_ndim
),
padded_shape1_
(
broadcast_ndim
),
padded_shape2_
(
broadcast_ndim
);
padded_shape1_
(
broadcast_ndim
),
padded_shape2_
(
broadcast_ndim
);
auto
broadcast_shape
=
broadcast_shape_
.
data
(),
auto
broadcast_shape
=
broadcast_shape_
.
data
(),
padded_shape1
=
padded_shape1_
.
data
(),
padded_shape1
=
padded_shape1_
.
data
(),
...
@@ -130,7 +130,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
...
@@ -130,7 +130,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
return
nullptr
;
return
nullptr
;
}
}
size_t
*
shape
=
new
size_t
[
ndim
];
size_t
*
shape
=
new
size_t
[
ndim
];
int64
_t
*
strides
=
new
int64
_t
[
ndim
];
ptrdiff
_t
*
strides
=
new
ptrdiff
_t
[
ndim
];
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
if
(
std
::
find
(
order
.
begin
(),
order
.
end
(),
i
)
==
order
.
end
())
{
if
(
std
::
find
(
order
.
begin
(),
order
.
end
(),
i
)
==
order
.
end
())
{
return
nullptr
;
return
nullptr
;
...
@@ -146,7 +146,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
...
@@ -146,7 +146,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
inline
bool
isContiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
inline
bool
isContiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
size_t
dim_start
,
size_t
dim_end
)
{
for
(
size_t
i
=
dim_start
+
1
;
i
<=
dim_end
;
i
++
)
{
for
(
size_t
i
=
dim_start
+
1
;
i
<=
dim_end
;
i
++
)
{
if
(
desc
->
strides
[
i
-
1
]
!=
static_cast
<
int64
_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
if
(
desc
->
strides
[
i
-
1
]
!=
static_cast
<
ptrdiff
_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
return
false
;
return
false
;
}
}
}
}
...
@@ -170,7 +170,7 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
...
@@ -170,7 +170,7 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
size_t
new_ndim
=
ndim
-
(
dim_end
-
dim_start
);
size_t
new_ndim
=
ndim
-
(
dim_end
-
dim_start
);
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
int64
_t
*
new_strides
=
new
int64
_t
[
new_ndim
];
ptrdiff
_t
*
new_strides
=
new
ptrdiff
_t
[
new_ndim
];
size_t
index
=
0
;
size_t
index
=
0
;
for
(
size_t
i
=
0
;
i
<
dim_start
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
dim_start
;
i
++
)
{
new_shape
[
index
]
=
desc
->
shape
[
i
];
new_shape
[
index
]
=
desc
->
shape
[
i
];
...
@@ -205,7 +205,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
...
@@ -205,7 +205,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
}
}
size_t
new_ndim
=
ndim
+
dims
.
size
()
-
1
;
size_t
new_ndim
=
ndim
+
dims
.
size
()
-
1
;
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
int64
_t
*
new_strides
=
new
int64
_t
[
new_ndim
];
ptrdiff
_t
*
new_strides
=
new
ptrdiff
_t
[
new_ndim
];
size_t
index
=
0
;
size_t
index
=
0
;
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
new_shape
[
index
]
=
desc
->
shape
[
i
];
new_shape
[
index
]
=
desc
->
shape
[
i
];
...
...
src/infiniop/tensor_descriptor.cc
View file @
64849b43
#include "infiniop/tensor_descriptor.h"
#include "infiniop/tensor_descriptor.h"
#include <cstring>
#include <cstring>
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape_
,
int64
_t
const
*
strides_
,
infiniDtype_t
datatype
)
{
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape_
,
ptrdiff
_t
const
*
strides_
,
infiniDtype_t
datatype
)
{
size_t
*
shape
=
new
size_t
[
ndim
];
size_t
*
shape
=
new
size_t
[
ndim
];
int64
_t
*
strides
=
new
int64
_t
[
ndim
];
ptrdiff
_t
*
strides
=
new
ptrdiff
_t
[
ndim
];
std
::
memcpy
(
shape
,
shape_
,
ndim
*
sizeof
(
size_t
));
std
::
memcpy
(
shape
,
shape_
,
ndim
*
sizeof
(
size_t
));
if
(
strides_
)
{
if
(
strides_
)
{
std
::
memcpy
(
strides
,
strides_
,
ndim
*
sizeof
(
int64
_t
));
std
::
memcpy
(
strides
,
strides_
,
ndim
*
sizeof
(
ptrdiff
_t
));
}
else
{
}
else
{
int64
_t
dsize
=
1
;
ptrdiff
_t
dsize
=
1
;
for
(
in
t
i
=
ndim
-
1
;
i
>=
0
;
i
--
)
{
for
(
size_
t
i
=
ndim
-
1
;
i
>=
0
;
i
--
)
{
strides
[
i
]
=
dsize
;
strides
[
i
]
=
dsize
;
dsize
*=
shape
[
i
];
dsize
*=
shape
[
i
];
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment