Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
98270602
Commit
98270602
authored
May 14, 2025
by
zhangyue
Browse files
issue/174: fix rearrange, change getStorageShape
parent
46a2678f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
59 additions
and
37 deletions
+59
-37
src/infiniop/devices/ascend/common_ascend.cc
src/infiniop/devices/ascend/common_ascend.cc
+11
-5
src/infiniop/ops/causal_softmax/ascend/causal_softmax_aclnn.cc
...nfiniop/ops/causal_softmax/ascend/causal_softmax_aclnn.cc
+2
-2
src/infiniop/ops/rearrange/ascend/rearrange_ascend.cc
src/infiniop/ops/rearrange/ascend/rearrange_ascend.cc
+39
-27
test/infiniop/causal_softmax.py
test/infiniop/causal_softmax.py
+4
-1
test/infiniop/libinfiniop/utils.py
test/infiniop/libinfiniop/utils.py
+3
-2
No files found.
src/infiniop/devices/ascend/common_ascend.cc
View file @
98270602
#include "common_ascend.h"
std
::
vector
<
int64_t
>
inferStorageShape
(
std
::
vector
<
int64_t
>
shape
,
std
::
vector
<
int64_t
>
strides
)
{
auto
index
=
std
::
max_element
(
strides
.
begin
(),
strides
.
end
());
uint64_t
max_stride_index
=
std
::
distance
(
strides
.
begin
(),
index
);
auto
storageShape
=
std
::
vector
<
int64_t
>
({
shape
[
max_stride_index
]
*
strides
[
max_stride_index
]});
if
(
shape
.
size
()
!=
strides
.
size
())
{
throw
std
::
invalid_argument
(
"Shape and strides must have the same length."
);
}
int64_t
max_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
++
i
)
{
max_offset
+=
(
shape
[
i
]
-
1
)
*
strides
[
i
];
}
return
storageShape
;
// storage shape is 1D buffer that must cover all accessed elements
return
{
max_offset
+
1
};
}
size_t
aclnnTensorDescriptor
::
numel
()
const
{
...
...
@@ -18,7 +24,7 @@ aclnnTensorDescriptor::aclnnTensorDescriptor(infiniopTensorDescriptor_t desc, vo
this
->
strides
=
std
::
vector
<
int64_t
>
(
ndim
);
for
(
uint64_t
i
=
0
;
i
<
ndim
;
++
i
)
{
this
->
shape
[
i
]
=
static_cast
<
int64_t
>
(
desc
->
dim
(
i
));
this
->
strides
[
i
]
=
desc
->
stride
(
i
);
this
->
strides
[
i
]
=
static_cast
<
int64_t
>
(
desc
->
stride
(
i
)
)
;
}
this
->
storageShape
=
inferStorageShape
(
this
->
shape
,
this
->
strides
);
this
->
dataType
=
toAclDataType
(
desc
->
dtype
());
...
...
src/infiniop/ops/causal_softmax/ascend/causal_softmax_aclnn.cc
View file @
98270602
...
...
@@ -97,7 +97,8 @@ infiniStatus_t Descriptor::create(
CHECK_ACL
(
aclnnSoftmaxGetWorkspaceSize
(
tx
,
dim
,
ty
,
&
workspacesize_softmax
,
&
executor
));
// Create the descriptor
size_t
all_workspacesize
=
workspacesize_softmax
+
workspacesize_mask
;
size_t
all_workspacesize
=
std
::
max
(
workspacesize_softmax
,
workspacesize_mask
);
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
x
,
mask
,
y
,
value
,
mask_addr
,
value_addr
},
std
::
move
(
info
),
all_workspacesize
,
handle_ascend
->
device
,
handle_ascend
->
device_id
);
...
...
@@ -127,7 +128,6 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size, voi
AclSetTensorAddr
(
mask_executor
,
2
,
tvalue
,
_opaque
->
value_addr
);
CHECK_ACL
(
aclnnInplaceMaskedFillTensorGetWorkspaceSize
(
tx
,
tmask
,
tvalue
,
&
workspacesize_mask
,
&
mask_executor
));
CHECK_ACL
(
aclnnInplaceMaskedFillTensor
(
workspace
,
workspacesize_mask
,
mask_executor
,
stream
));
CHECK_ACL
(
aclrtSynchronizeStream
(
stream
));
AclSetTensorAddr
(
executor
,
0
,
tx
,
(
void
*
)
x
);
AclSetTensorAddr
(
executor
,
1
,
ty
,
y
);
...
...
src/infiniop/ops/rearrange/ascend/rearrange_ascend.cc
View file @
98270602
...
...
@@ -5,10 +5,16 @@
namespace
op
::
rearrange
::
ascend
{
struct
Descriptor
::
Opaque
{
aclDataType
dt
;
std
::
vector
<
int64_t
>
shape
;
std
::
vector
<
int64_t
>
dst_strides
;
std
::
vector
<
int64_t
>
src_strides
;
aclnnTensorDescriptor_t
dst
;
aclnnTensorDescriptor_t
src
;
void
*
workspace
;
// aclnnInplaceCopy workspace
uint64_t
workspace_size
;
~
Opaque
()
{
delete
dst
;
delete
src
;
aclrtFree
(
workspace
);
}
};
Descriptor
::~
Descriptor
()
{
...
...
@@ -37,24 +43,31 @@ infiniStatus_t Descriptor::create(
auto
result
=
utils
::
RearrangeMeta
::
create
(
shape
.
data
(),
dst_strides
.
data
(),
src_strides
.
data
(),
ndim
,
element_size
);
CHECK_RESULT
(
result
);
std
::
vector
<
int64_t
>
shape_
(
ndim
);
std
::
vector
<
int64_t
>
dst_strides_
(
ndim
);
std
::
vector
<
int64_t
>
src_strides_
(
ndim
);
for
(
size_t
i
=
0
;
i
<
ndim
;
i
++
)
{
shape_
[
i
]
=
static_cast
<
int64_t
>
(
shape
[
i
]);
dst_strides_
[
i
]
=
static_cast
<
int64_t
>
(
dst_strides
[
i
]);
src_strides_
[
i
]
=
static_cast
<
int64_t
>
(
src_strides
[
i
]);
aclnnTensorDescriptor_t
dst
=
new
aclnnTensorDescriptor
(
y_desc
);
aclnnTensorDescriptor_t
src
=
new
aclnnTensorDescriptor
(
x_desc
);
uint64_t
workspace_size
=
0
;
aclOpExecutor
*
executor
=
nullptr
;
void
*
workspace
=
nullptr
;
aclnnInplaceCopyGetWorkspaceSize
(
dst
->
tensor
,
src
->
tensor
,
&
workspace_size
,
&
executor
);
if
(
workspace_size
!=
0
)
{
CHECK_ACL
(
aclrtMalloc
(
&
workspace
,
workspace_size
,
ACL_MEM_MALLOC_HUGE_FIRST
));
}
*
desc_ptr
=
new
Descriptor
(
result
.
take
(),
new
Opaque
{
toAclDataType
(
dtype
)
,
s
hape_
,
dst_strides_
,
src_strides_
},
dst
,
s
rc
,
workspace
,
workspace_size
},
handle
->
device
,
handle
->
device_id
);
// Delete useless executor
aclDestroyAclOpExecutor
(
executor
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -62,20 +75,19 @@ infiniStatus_t Descriptor::calculate(
void
*
y
,
const
void
*
x
,
void
*
stream
)
const
{
auto
tdst
=
_opaque
->
dst
->
tensor
;
auto
tsrc
=
_opaque
->
src
->
tensor
;
auto
y_
=
aclnnTensorDescriptor
(
_opaque
->
dt
,
_opaque
->
shape
,
_opaque
->
dst_strides
,
y
);
auto
x_
=
aclnnTensorDescriptor
(
_opaque
->
dt
,
_opaque
->
shape
,
_opaque
->
src_strides
,
(
void
*
)
x
);
auto
ty
=
y_
.
tensor
;
auto
tx
=
x_
.
tensor
;
size_t
workspace_size
=
0
;
uint64_t
workspace_size
=
0
;
aclOpExecutor
*
executor
=
nullptr
;
void
*
workspace
=
nullptr
;
CHECK_ACL
(
aclnnInplaceCopyGetWorkspaceSize
(
ty
,
tx
,
&
workspace_size
,
&
executor
));
if
(
workspace_size
!=
0
)
{
CHECK_ACL
(
aclrtMalloc
(
&
workspace
,
workspace_size
,
ACL_MEM_MALLOC_HUGE_FIRST
));
}
CHECK_ACL
(
aclnnInplaceCopy
(
workspace
,
workspace_size
,
executor
,
stream
));
AclSetTensorAddr
(
executor
,
0
,
tdst
,
y
);
AclSetTensorAddr
(
executor
,
1
,
tsrc
,
(
void
*
)
x
);
CHECK_ACL
(
aclnnInplaceCopyGetWorkspaceSize
(
tdst
,
tsrc
,
&
workspace_size
,
&
executor
));
// Execute InplaceCopy
CHECK_ACL
(
aclnnInplaceCopy
(
_opaque
->
workspace
,
_opaque
->
workspace_size
,
executor
,
stream
));
return
INFINI_STATUS_SUCCESS
;
}
...
...
test/infiniop/causal_softmax.py
View file @
98270602
...
...
@@ -37,7 +37,7 @@ _TENSOR_DTYPES = [torch.float16]
# Tolerance map for different data types
_TOLERANCE_MAP
=
{
torch
.
float16
:
{
"atol"
:
0
,
"rtol"
:
1e-2
},
torch
.
float16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-2
},
}
...
...
@@ -143,6 +143,9 @@ def test(
)
lib_causal_softmax
()
if
sync
is
not
None
:
sync
()
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
:
...
...
test/infiniop/libinfiniop/utils.py
View file @
98270602
...
...
@@ -476,10 +476,11 @@ def get_test_devices(args):
def
get_sync_func
(
device
):
import
torch
device_str
=
infiniDeviceEnum_str_map
[
device
]
if
device
==
"cpu"
:
if
device
_str
==
"cpu"
:
sync
=
None
else
:
sync
=
getattr
(
torch
,
infiniDeviceEnum_str_map
[
device
]
).
synchronize
sync
=
getattr
(
torch
,
device_str
).
synchronize
return
sync
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment