Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
f385af7a
You need to sign in or sign up before continuing.
Unverified
Commit
f385af7a
authored
Feb 18, 2025
by
PanZezhong1725
Committed by
GitHub
Feb 18, 2025
Browse files
Merge pull request #54 from PanZezhong1725/issue/53
issue/53: 形状统一为size_t,步长统一为ptrdiff_t
parents
3c31dc6c
64849b43
Changes
19
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
59 additions
and
59 deletions
+59
-59
include/infiniop/ops/attention.h
include/infiniop/ops/attention.h
+1
-1
include/infiniop/ops/avg_pool.h
include/infiniop/ops/avg_pool.h
+4
-4
include/infiniop/ops/max_pool.h
include/infiniop/ops/max_pool.h
+4
-4
include/infiniop/tensor_descriptor.h
include/infiniop/tensor_descriptor.h
+2
-2
src/infiniop/devices/cpu/common_cpu.cc
src/infiniop/devices/cpu/common_cpu.cc
+4
-4
src/infiniop/devices/cpu/common_cpu.h
src/infiniop/devices/cpu/common_cpu.h
+2
-2
src/infiniop/devices/cuda/common_cuda.cuh
src/infiniop/devices/cuda/common_cuda.cuh
+3
-3
src/infiniop/ops/causal_softmax/operator.cc
src/infiniop/ops/causal_softmax/operator.cc
+2
-2
src/infiniop/ops/matmul/blas.h
src/infiniop/ops/matmul/blas.h
+4
-4
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
+2
-2
src/infiniop/ops/matmul/cpu/matmul_cpu_api.h
src/infiniop/ops/matmul/cpu/matmul_cpu_api.h
+2
-2
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
+1
-1
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
+2
-2
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
+1
-1
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+2
-2
src/infiniop/ops/rms_norm/operator.cc
src/infiniop/ops/rms_norm/operator.cc
+2
-2
src/infiniop/ops/rotary_embedding/operator.cc
src/infiniop/ops/rotary_embedding/operator.cc
+2
-2
src/infiniop/ops/utils.h
src/infiniop/ops/utils.h
+14
-14
src/infiniop/tensor_descriptor.cc
src/infiniop/tensor_descriptor.cc
+5
-5
No files found.
include/infiniop/ops/attention.h
View file @
f385af7a
...
@@ -15,7 +15,7 @@ __C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t
...
@@ -15,7 +15,7 @@ __C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t
infiniopTensorDescriptor_t
v_desc
,
infiniopTensorDescriptor_t
v_desc
,
infiniopTensorDescriptor_t
k_cache_desc
,
infiniopTensorDescriptor_t
k_cache_desc
,
infiniopTensorDescriptor_t
v_cache_desc
,
infiniopTensorDescriptor_t
v_cache_desc
,
uint64
_t
pos
);
size
_t
pos
);
__C
__export
infiniopStatus_t
infiniopGetAttentionWorkspaceSize
(
infiniopAttentionDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopGetAttentionWorkspaceSize
(
infiniopAttentionDescriptor_t
desc
,
size_t
*
size
);
...
...
include/infiniop/ops/avg_pool.h
View file @
f385af7a
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateAvgPoolDescriptor(infiniopHandle_t h
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateAvgPoolDescriptor(infiniopHandle_t h
infiniopAvgPoolDescriptor_t
*
desc_ptr
,
infiniopAvgPoolDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
x
,
infiniopTensorDescriptor_t
x
,
uint64
_t
const
*
kernel_shape
,
size
_t
const
*
kernel_shape
,
uint64
_t
const
*
pads
,
size
_t
const
*
pads
,
int64
_t
const
*
strides
,
ptrdiff
_t
const
*
strides
,
uint64
_t
n
);
size
_t
n
);
__C
__export
infiniopStatus_t
infiniopGetAvgPoolWorkspaceSize
(
infiniopAvgPoolDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopGetAvgPoolWorkspaceSize
(
infiniopAvgPoolDescriptor_t
desc
,
size_t
*
size
);
...
...
include/infiniop/ops/max_pool.h
View file @
f385af7a
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateMaxPoolDescriptor(infiniopHandle_t h
...
@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateMaxPoolDescriptor(infiniopHandle_t h
infiniopMaxPoolDescriptor_t
*
desc_ptr
,
infiniopMaxPoolDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
y
,
infiniopTensorDescriptor_t
x
,
infiniopTensorDescriptor_t
x
,
uint64
_t
const
*
kernel_shape
,
size
_t
const
*
kernel_shape
,
uint64
_t
const
*
pads
,
size
_t
const
*
pads
,
int64
_t
const
*
strides
,
ptrdiff
_t
const
*
strides
,
uint64
_t
n
);
size
_t
n
);
__C
__export
infiniopStatus_t
infiniopGetMaxPoolWorkspaceSize
(
infiniopMaxPoolDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopGetMaxPoolWorkspaceSize
(
infiniopMaxPoolDescriptor_t
desc
,
size_t
*
size
);
...
...
include/infiniop/tensor_descriptor.h
View file @
f385af7a
...
@@ -12,12 +12,12 @@ struct InfiniopTensorDescriptor {
...
@@ -12,12 +12,12 @@ struct InfiniopTensorDescriptor {
// Shape of the tensor, ndim elements
// Shape of the tensor, ndim elements
size_t
*
shape
;
size_t
*
shape
;
// Stride of each dimension in elements, ndim elements
// Stride of each dimension in elements, ndim elements
int64
_t
*
strides
;
ptrdiff
_t
*
strides
;
};
};
typedef
struct
InfiniopTensorDescriptor
*
infiniopTensorDescriptor_t
;
typedef
struct
InfiniopTensorDescriptor
*
infiniopTensorDescriptor_t
;
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape
,
int64
_t
const
*
strides
,
infiniDtype_t
dtype
);
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape
,
ptrdiff
_t
const
*
strides
,
infiniDtype_t
dtype
);
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
...
...
src/infiniop/devices/cpu/common_cpu.cc
View file @
f385af7a
...
@@ -60,8 +60,8 @@ uint16_t f32_to_f16(float val) {
...
@@ -60,8 +60,8 @@ uint16_t f32_to_f16(float val) {
}
}
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64
_t
const
*
broadcasted_strides
,
ptrdiff
_t
const
*
broadcasted_strides
,
int64
_t
const
*
target_strides
)
{
ptrdiff
_t
const
*
target_strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
...
@@ -71,7 +71,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim,
...
@@ -71,7 +71,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim,
}
}
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
int64
_t
const
*
strides
)
{
ptrdiff
_t
const
*
strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
ndim
;
i
--
>=
0
;)
{
for
(
size_t
i
=
ndim
;
i
--
>=
0
;)
{
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
...
@@ -81,7 +81,7 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
...
@@ -81,7 +81,7 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
}
}
size_t
getPaddedSize
(
size_t
ndim
,
size_t
*
shape
,
size_t
const
*
pads
)
{
size_t
getPaddedSize
(
size_t
ndim
,
size_t
*
shape
,
size_t
const
*
pads
)
{
uint64
_t
total_size
=
1
;
size
_t
total_size
=
1
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
total_size
*=
shape
[
i
]
+
(
i
<
2
?
0
:
2
*
pads
[
i
-
2
]);
total_size
*=
shape
[
i
]
+
(
i
<
2
?
0
:
2
*
pads
[
i
-
2
]);
}
}
...
...
src/infiniop/devices/cpu/common_cpu.h
View file @
f385af7a
...
@@ -13,10 +13,10 @@ float f16_to_f32(uint16_t code);
...
@@ -13,10 +13,10 @@ float f16_to_f32(uint16_t code);
uint16_t
f32_to_f16
(
float
val
);
uint16_t
f32_to_f16
(
float
val
);
// return the memory offset of original tensor, given the flattened index of broadcasted tensor
// return the memory offset of original tensor, given the flattened index of broadcasted tensor
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64
_t
const
*
broadcasted_strides
,
int64
_t
const
*
target_strides
);
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
ptrdiff
_t
const
*
broadcasted_strides
,
ptrdiff
_t
const
*
target_strides
);
// return the memory offset a tensor given flattened index
// return the memory offset a tensor given flattened index
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
int64
_t
const
*
strides
);
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
ptrdiff
_t
const
*
strides
);
/**
/**
* get the total array size (element count) after applying padding for a
* get the total array size (element count) after applying padding for a
...
...
src/infiniop/devices/cuda/common_cuda.cuh
View file @
f385af7a
...
@@ -96,8 +96,8 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
...
@@ -96,8 +96,8 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
// return the memory offset of original tensor, given the flattened index of
// return the memory offset of original tensor, given the flattened index of
// broadcasted tensor
// broadcasted tensor
inline
__device__
__host__
size_t
indexToReducedOffset
(
inline
__device__
__host__
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64
_t
const
*
broadcasted_strides
,
size_t
flat_index
,
size_t
ndim
,
ptrdiff
_t
const
*
broadcasted_strides
,
int64
_t
const
*
target_strides
)
{
ptrdiff
_t
const
*
target_strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
...
@@ -109,7 +109,7 @@ inline __device__ __host__ size_t indexToReducedOffset(
...
@@ -109,7 +109,7 @@ inline __device__ __host__ size_t indexToReducedOffset(
// get the memory offset of the given element in a tensor given its flat index
// get the memory offset of the given element in a tensor given its flat index
inline
__device__
__host__
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
inline
__device__
__host__
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
size_t
const
*
shape
,
int64
_t
const
*
strides
)
{
ptrdiff
_t
const
*
strides
)
{
size_t
res
=
0
;
size_t
res
=
0
;
for
(
size_t
i
=
ndim
;
i
--
>
0
;)
{
for
(
size_t
i
=
ndim
;
i
--
>
0
;)
{
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
...
...
src/infiniop/ops/causal_softmax/operator.cc
View file @
f385af7a
...
@@ -40,7 +40,7 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
...
@@ -40,7 +40,7 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopGetCausalSoftmaxWorkspaceSize
(
infiniopCausalSoftmaxDescriptor_t
desc
,
uint64
_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetCausalSoftmaxWorkspaceSize
(
infiniopCausalSoftmaxDescriptor_t
desc
,
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -78,7 +78,7 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
...
@@ -78,7 +78,7 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopCausalSoftmax
(
infiniopCausalSoftmaxDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
data
,
void
*
stream
)
{
__C
infiniopStatus_t
infiniopCausalSoftmax
(
infiniopCausalSoftmaxDescriptor_t
desc
,
void
*
workspace
,
size
_t
workspace_size
,
void
*
data
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
...
src/infiniop/ops/matmul/blas.h
View file @
f385af7a
...
@@ -9,11 +9,11 @@
...
@@ -9,11 +9,11 @@
typedef
struct
BlasMatrix
{
typedef
struct
BlasMatrix
{
size_t
ndim
;
size_t
ndim
;
size_t
batch
;
size_t
batch
;
int64
_t
stride
;
ptrdiff
_t
stride
;
size_t
rows
;
size_t
rows
;
size_t
cols
;
size_t
cols
;
int64
_t
row_stride
;
ptrdiff
_t
row_stride
;
int64
_t
col_stride
;
ptrdiff
_t
col_stride
;
BlasMatrix
()
{}
BlasMatrix
()
{}
...
@@ -56,7 +56,7 @@ typedef struct BlasMatrix {
...
@@ -56,7 +56,7 @@ typedef struct BlasMatrix {
std
::
swap
(
row_stride
,
col_stride
);
std
::
swap
(
row_stride
,
col_stride
);
}
}
int64
_t
ld
()
const
{
ptrdiff
_t
ld
()
const
{
if
(
this
->
row_stride
==
1
)
{
if
(
this
->
row_stride
==
1
)
{
return
this
->
col_stride
;
return
this
->
col_stride
;
}
else
{
}
else
{
...
...
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
View file @
f385af7a
...
@@ -25,7 +25,7 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
...
@@ -25,7 +25,7 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
}
}
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
uint64
_t
*
size
)
{
size
_t
*
size
)
{
*
size
=
0
;
*
size
=
0
;
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
}
}
...
@@ -76,7 +76,7 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c,
...
@@ -76,7 +76,7 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c,
}
}
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
size
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
float
beta
)
{
void
const
*
b
,
float
alpha
,
float
beta
)
{
if
(
desc
->
dtype
==
INFINI_DTYPE_F16
)
{
if
(
desc
->
dtype
==
INFINI_DTYPE_F16
)
{
return
cpuCalculateMatmul
<
uint16_t
>
(
desc
,
c
,
beta
,
a
,
b
,
alpha
);
return
cpuCalculateMatmul
<
uint16_t
>
(
desc
,
c
,
beta
,
a
,
b
,
alpha
);
...
...
src/infiniop/ops/matmul/cpu/matmul_cpu_api.h
View file @
f385af7a
...
@@ -14,10 +14,10 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
...
@@ -14,10 +14,10 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
infiniopTensorDescriptor_t
b_desc
);
infiniopTensorDescriptor_t
b_desc
);
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
infiniopStatus_t
cpuGetMatmulWorkspaceSize
(
infiniopMatmulCpuDescriptor_t
desc
,
uint64
_t
*
size
);
size
_t
*
size
);
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
infiniopStatus_t
cpuMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
size
_t
workspace_size
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
float
beta
);
void
const
*
b
,
float
alpha
,
float
beta
);
infiniopStatus_t
cpuDestroyMatmulDescriptor
(
infiniopMatmulCpuDescriptor_t
desc
);
infiniopStatus_t
cpuDestroyMatmulDescriptor
(
infiniopMatmulCpuDescriptor_t
desc
);
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
View file @
f385af7a
...
@@ -27,7 +27,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
...
@@ -27,7 +27,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
}
}
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
uint64
_t
*
size
)
{
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
size
_t
*
size
)
{
*
size
=
0
;
*
size
=
0
;
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
}
}
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
View file @
f385af7a
...
@@ -13,11 +13,11 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
...
@@ -13,11 +13,11 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
);
infiniopTensorDescriptor_t
b_desc
);
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
uint64
_t
*
size
);
infiniopStatus_t
cudaGetMatmulWorkspaceSize
(
infiniopMatmulCudaDescriptor_t
desc
,
size
_t
*
size
);
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
void
*
workspace
,
void
*
workspace
,
uint64
_t
workspace_size
,
size
_t
workspace_size
,
void
*
c
,
void
*
c
,
void
const
*
a
,
void
const
*
a
,
void
const
*
b
,
void
const
*
b
,
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
View file @
f385af7a
...
@@ -56,7 +56,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
...
@@ -56,7 +56,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
infiniopStatus_t
cudaMatmul
(
infiniopMatmulCudaDescriptor_t
desc
,
void
*
workspace
,
void
*
workspace
,
uint64
_t
workspace_size
,
size
_t
workspace_size
,
void
*
c
,
void
*
c
,
void
const
*
a
,
void
const
*
a
,
void
const
*
b
,
void
const
*
b
,
...
...
src/infiniop/ops/random_sample/operator.cc
View file @
f385af7a
...
@@ -38,7 +38,7 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
...
@@ -38,7 +38,7 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
};
};
__C
infiniopStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
uint64
_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -77,7 +77,7 @@ __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDe
...
@@ -77,7 +77,7 @@ __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDe
__C
infiniopStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
__C
infiniopStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
void
*
workspace
,
uint64
_t
workspace_size
,
size
_t
workspace_size
,
void
*
result
,
void
*
result
,
void
const
*
probs
,
void
const
*
probs
,
float
random_val
,
float
random_val
,
...
...
src/infiniop/ops/rms_norm/operator.cc
View file @
f385af7a
...
@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
...
@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
uint64
_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -83,7 +83,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
...
@@ -83,7 +83,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
}
__C
infiniopStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
__C
infiniopStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
size
_t
workspace_size
,
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
)
{
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
...
...
src/infiniop/ops/rotary_embedding/operator.cc
View file @
f385af7a
...
@@ -53,7 +53,7 @@ __C infiniopStatus_t infiniopCreateRoPEDescriptor(
...
@@ -53,7 +53,7 @@ __C infiniopStatus_t infiniopCreateRoPEDescriptor(
}
}
__C
infiniopStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
__C
infiniopStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
uint64
_t
*
size
)
{
size
_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
...
@@ -90,7 +90,7 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
...
@@ -90,7 +90,7 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
}
}
__C
infiniopStatus_t
infiniopRoPE
(
infiniopRoPEDescriptor_t
desc
,
__C
infiniopStatus_t
infiniopRoPE
(
infiniopRoPEDescriptor_t
desc
,
void
*
workspace
,
uint64
_t
workspace_size
,
void
*
workspace
,
size
_t
workspace_size
,
void
*
t
,
void
const
*
pos_ids
,
void
*
t
,
void
const
*
pos_ids
,
void
const
*
sin_table
,
void
const
*
cos_table
,
void
const
*
sin_table
,
void
const
*
cos_table
,
void
*
stream
)
{
void
*
stream
)
{
...
...
src/infiniop/ops/utils.h
View file @
f385af7a
...
@@ -37,9 +37,9 @@
...
@@ -37,9 +37,9 @@
} \
} \
} while (0)
} while (0)
inline
std
::
vector
<
int64
_t
>
getByteStrides
(
infiniopTensorDescriptor_t
desc
)
{
inline
std
::
vector
<
ptrdiff
_t
>
getByteStrides
(
infiniopTensorDescriptor_t
desc
)
{
std
::
vector
<
int64
_t
>
strides
(
desc
->
ndim
);
std
::
vector
<
ptrdiff
_t
>
strides
(
desc
->
ndim
);
for
(
uint64
_t
i
=
0
;
i
<
desc
->
ndim
;
i
++
)
{
for
(
size
_t
i
=
0
;
i
<
desc
->
ndim
;
i
++
)
{
strides
[
i
]
=
desc
->
strides
[
i
]
*
infiniSizeof
(
desc
->
dtype
);
strides
[
i
]
=
desc
->
strides
[
i
]
*
infiniSizeof
(
desc
->
dtype
);
}
}
return
strides
;
return
strides
;
...
@@ -54,11 +54,11 @@ inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
...
@@ -54,11 +54,11 @@ inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
}
}
// calculate the broadcasted shape for two tensors
// calculate the broadcasted shape for two tensors
inline
bool
getBroadcastShape
(
const
uint64
_t
*
shape1
,
uint64
_t
ndim1
,
inline
bool
getBroadcastShape
(
const
size
_t
*
shape1
,
size
_t
ndim1
,
const
uint64
_t
*
shape2
,
uint64
_t
ndim2
,
const
size
_t
*
shape2
,
size
_t
ndim2
,
uint64
_t
*
broadcast_shape
,
size
_t
*
broadcast_shape
,
uint64
_t
*
padded_shape1
,
uint64
_t
*
padded_shape2
,
size
_t
*
padded_shape1
,
size
_t
*
padded_shape2
,
uint64
_t
max_rank
)
{
size
_t
max_rank
)
{
// prepending and initializing
// prepending and initializing
std
::
fill
(
padded_shape1
,
padded_shape1
+
max_rank
,
1
);
std
::
fill
(
padded_shape1
,
padded_shape1
+
max_rank
,
1
);
std
::
fill
(
padded_shape2
,
padded_shape2
+
max_rank
,
1
);
std
::
fill
(
padded_shape2
,
padded_shape2
+
max_rank
,
1
);
...
@@ -82,8 +82,8 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
...
@@ -82,8 +82,8 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
c
,
infiniopTensorDescriptor_t
c
,
uint64
_t
broadcast_ndim
)
{
size
_t
broadcast_ndim
)
{
std
::
vector
<
uint64
_t
>
broadcast_shape_
(
broadcast_ndim
),
std
::
vector
<
size
_t
>
broadcast_shape_
(
broadcast_ndim
),
padded_shape1_
(
broadcast_ndim
),
padded_shape2_
(
broadcast_ndim
);
padded_shape1_
(
broadcast_ndim
),
padded_shape2_
(
broadcast_ndim
);
auto
broadcast_shape
=
broadcast_shape_
.
data
(),
auto
broadcast_shape
=
broadcast_shape_
.
data
(),
padded_shape1
=
padded_shape1_
.
data
(),
padded_shape1
=
padded_shape1_
.
data
(),
...
@@ -130,7 +130,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
...
@@ -130,7 +130,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
return
nullptr
;
return
nullptr
;
}
}
size_t
*
shape
=
new
size_t
[
ndim
];
size_t
*
shape
=
new
size_t
[
ndim
];
int64
_t
*
strides
=
new
int64
_t
[
ndim
];
ptrdiff
_t
*
strides
=
new
ptrdiff
_t
[
ndim
];
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
if
(
std
::
find
(
order
.
begin
(),
order
.
end
(),
i
)
==
order
.
end
())
{
if
(
std
::
find
(
order
.
begin
(),
order
.
end
(),
i
)
==
order
.
end
())
{
return
nullptr
;
return
nullptr
;
...
@@ -146,7 +146,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
...
@@ -146,7 +146,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
inline
bool
isContiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
inline
bool
isContiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
size_t
dim_start
,
size_t
dim_end
)
{
for
(
size_t
i
=
dim_start
+
1
;
i
<=
dim_end
;
i
++
)
{
for
(
size_t
i
=
dim_start
+
1
;
i
<=
dim_end
;
i
++
)
{
if
(
desc
->
strides
[
i
-
1
]
!=
static_cast
<
int64
_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
if
(
desc
->
strides
[
i
-
1
]
!=
static_cast
<
ptrdiff
_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
return
false
;
return
false
;
}
}
}
}
...
@@ -170,7 +170,7 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
...
@@ -170,7 +170,7 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
size_t
new_ndim
=
ndim
-
(
dim_end
-
dim_start
);
size_t
new_ndim
=
ndim
-
(
dim_end
-
dim_start
);
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
int64
_t
*
new_strides
=
new
int64
_t
[
new_ndim
];
ptrdiff
_t
*
new_strides
=
new
ptrdiff
_t
[
new_ndim
];
size_t
index
=
0
;
size_t
index
=
0
;
for
(
size_t
i
=
0
;
i
<
dim_start
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
dim_start
;
i
++
)
{
new_shape
[
index
]
=
desc
->
shape
[
i
];
new_shape
[
index
]
=
desc
->
shape
[
i
];
...
@@ -205,7 +205,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
...
@@ -205,7 +205,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
}
}
size_t
new_ndim
=
ndim
+
dims
.
size
()
-
1
;
size_t
new_ndim
=
ndim
+
dims
.
size
()
-
1
;
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
size_t
*
new_shape
=
new
size_t
[
new_ndim
];
int64
_t
*
new_strides
=
new
int64
_t
[
new_ndim
];
ptrdiff
_t
*
new_strides
=
new
ptrdiff
_t
[
new_ndim
];
size_t
index
=
0
;
size_t
index
=
0
;
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
dim
;
i
++
)
{
new_shape
[
index
]
=
desc
->
shape
[
i
];
new_shape
[
index
]
=
desc
->
shape
[
i
];
...
...
src/infiniop/tensor_descriptor.cc
View file @
f385af7a
#include "infiniop/tensor_descriptor.h"
#include "infiniop/tensor_descriptor.h"
#include <cstring>
#include <cstring>
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape_
,
int64
_t
const
*
strides_
,
infiniDtype_t
datatype
)
{
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape_
,
ptrdiff
_t
const
*
strides_
,
infiniDtype_t
datatype
)
{
size_t
*
shape
=
new
size_t
[
ndim
];
size_t
*
shape
=
new
size_t
[
ndim
];
int64
_t
*
strides
=
new
int64
_t
[
ndim
];
ptrdiff
_t
*
strides
=
new
ptrdiff
_t
[
ndim
];
std
::
memcpy
(
shape
,
shape_
,
ndim
*
sizeof
(
size_t
));
std
::
memcpy
(
shape
,
shape_
,
ndim
*
sizeof
(
size_t
));
if
(
strides_
)
{
if
(
strides_
)
{
std
::
memcpy
(
strides
,
strides_
,
ndim
*
sizeof
(
int64
_t
));
std
::
memcpy
(
strides
,
strides_
,
ndim
*
sizeof
(
ptrdiff
_t
));
}
else
{
}
else
{
int64
_t
dsize
=
1
;
ptrdiff
_t
dsize
=
1
;
for
(
in
t
i
=
ndim
-
1
;
i
>=
0
;
i
--
)
{
for
(
size_
t
i
=
ndim
-
1
;
i
>=
0
;
i
--
)
{
strides
[
i
]
=
dsize
;
strides
[
i
]
=
dsize
;
dsize
*=
shape
[
i
];
dsize
*=
shape
[
i
];
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment