Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
torch-sparse
Commits
d1d4ec3c
"...text-generation-inference.git" did not exist on "dbb23fbfa868ad8f961c60896e346fad3d2ab440"
Commit
d1d4ec3c
authored
Nov 09, 2020
by
rusty1s
Browse files
parallel convert on CPU, fix bug for nnz=0
parent
99117398
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
52 additions
and
17 deletions
+52
-17
csrc/cpu/convert_cpu.cpp
csrc/cpu/convert_cpu.cpp
+31
-17
csrc/cuda/convert_cuda.cu
csrc/cuda/convert_cuda.cu
+4
-0
test/test_storage.py
test/test_storage.py
+17
-0
No files found.
csrc/cpu/convert_cpu.cpp
View file @
d1d4ec3c
#include "convert_cpu.h"
#include <ATen/Parallel.h>
#include "utils.h"
torch
::
Tensor
ind2ptr_cpu
(
torch
::
Tensor
ind
,
int64_t
M
)
{
...
...
@@ -8,18 +10,25 @@ torch::Tensor ind2ptr_cpu(torch::Tensor ind, int64_t M) {
auto
ind_data
=
ind
.
data_ptr
<
int64_t
>
();
auto
out_data
=
out
.
data_ptr
<
int64_t
>
();
int64_t
numel
=
ind
.
numel
(),
idx
=
ind_data
[
0
],
next_idx
;
for
(
int64_t
i
=
0
;
i
<=
idx
;
i
++
)
out_data
[
i
]
=
0
;
int64_t
numel
=
ind
.
numel
();
for
(
int64_t
i
=
0
;
i
<
numel
-
1
;
i
++
)
{
next_idx
=
ind_data
[
i
+
1
];
for
(
int64_t
j
=
idx
;
j
<
next_idx
;
j
++
)
out_data
[
j
+
1
]
=
i
+
1
;
idx
=
next_idx
;
}
if
(
numel
==
0
)
return
out
.
zero_
();
for
(
int64_t
i
=
0
;
i
<=
ind_data
[
0
];
i
++
)
out_data
[
i
]
=
0
;
for
(
int64_t
i
=
idx
+
1
;
i
<
M
+
1
;
i
++
)
int64_t
grain_size
=
at
::
internal
::
GRAIN_SIZE
;
at
::
parallel_for
(
0
,
numel
,
grain_size
,
[
&
](
int64_t
begin
,
int64_t
end
)
{
int64_t
idx
=
ind_data
[
begin
],
next_idx
;
for
(
int64_t
i
=
begin
;
i
<
std
::
min
(
end
,
numel
-
1
);
i
++
)
{
next_idx
=
ind_data
[
i
+
1
];
for
(;
idx
<
next_idx
;
idx
++
)
out_data
[
idx
+
1
]
=
i
+
1
;
}
});
for
(
int64_t
i
=
ind_data
[
numel
-
1
]
+
1
;
i
<
M
+
1
;
i
++
)
out_data
[
i
]
=
numel
;
return
out
;
...
...
@@ -31,13 +40,18 @@ torch::Tensor ptr2ind_cpu(torch::Tensor ptr, int64_t E) {
auto
ptr_data
=
ptr
.
data_ptr
<
int64_t
>
();
auto
out_data
=
out
.
data_ptr
<
int64_t
>
();
int64_t
idx
=
ptr_data
[
0
],
next_idx
;
for
(
int64_t
i
=
0
;
i
<
ptr
.
numel
()
-
1
;
i
++
)
{
next_idx
=
ptr_data
[
i
+
1
];
for
(
int64_t
e
=
idx
;
e
<
next_idx
;
e
++
)
out_data
[
e
]
=
i
;
idx
=
next_idx
;
}
int64_t
numel
=
ptr
.
numel
();
int64_t
grain_size
=
at
::
internal
::
GRAIN_SIZE
;
at
::
parallel_for
(
0
,
numel
-
1
,
grain_size
,
[
&
](
int64_t
begin
,
int64_t
end
)
{
int64_t
idx
=
ptr_data
[
begin
],
next_idx
;
for
(
int64_t
i
=
begin
;
i
<
end
;
i
++
)
{
next_idx
=
ptr_data
[
i
+
1
];
for
(
int64_t
e
=
idx
;
e
<
next_idx
;
e
++
)
out_data
[
e
]
=
i
;
idx
=
next_idx
;
}
});
return
out
;
}
csrc/cuda/convert_cuda.cu
View file @
d1d4ec3c
...
...
@@ -28,6 +28,10 @@ torch::Tensor ind2ptr_cuda(torch::Tensor ind, int64_t M) {
cudaSetDevice
(
ind
.
get_device
());
auto
out
=
torch
::
empty
(
M
+
1
,
ind
.
options
());
if
(
ind
.
numel
()
==
0
)
return
out
.
zero_
();
auto
ind_data
=
ind
.
data_ptr
<
int64_t
>
();
auto
out_data
=
out
.
data_ptr
<
int64_t
>
();
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
...
...
test/test_storage.py
View file @
d1d4ec3c
...
...
@@ -7,6 +7,23 @@ from torch_sparse.storage import SparseStorage
from
.utils
import
dtypes
,
devices
,
tensor
@
pytest
.
mark
.
parametrize
(
'device'
,
devices
)
def
test_ind2ptr
(
device
):
row
=
tensor
([
2
,
2
,
4
,
5
,
5
,
6
],
torch
.
long
,
device
)
rowptr
=
torch
.
ops
.
torch_sparse
.
ind2ptr
(
row
,
8
)
assert
rowptr
.
tolist
()
==
[
0
,
0
,
0
,
2
,
2
,
3
,
5
,
6
,
6
]
row
=
torch
.
ops
.
torch_sparse
.
ptr2ind
(
rowptr
,
6
)
assert
row
.
tolist
()
==
[
2
,
2
,
4
,
5
,
5
,
6
]
row
=
tensor
([],
torch
.
long
,
device
)
rowptr
=
torch
.
ops
.
torch_sparse
.
ind2ptr
(
row
,
8
)
assert
rowptr
.
tolist
()
==
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]
row
=
torch
.
ops
.
torch_sparse
.
ptr2ind
(
rowptr
,
0
)
assert
row
.
tolist
()
==
[]
@
pytest
.
mark
.
parametrize
(
'dtype,device'
,
product
(
dtypes
,
devices
))
def
test_storage
(
dtype
,
device
):
row
,
col
=
tensor
([[
0
,
0
,
1
,
1
],
[
0
,
1
,
0
,
1
]],
torch
.
long
,
device
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment