Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FAST-RNNT
Commits
86e3a617
Commit
86e3a617
authored
Jul 01, 2021
by
Daniel Povey
Browse files
Fix some bugs..
parent
92814db2
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
2 deletions
+14
-2
torch_integrated_conv/integrated_conv.py
torch_integrated_conv/integrated_conv.py
+4
-2
torch_integrated_conv/integrated_conv_cuda_kernel.cu
torch_integrated_conv/integrated_conv_cuda_kernel.cu
+2
-0
torch_integrated_conv/integrated_conv_test.py
torch_integrated_conv/integrated_conv_test.py
+8
-0
No files found.
torch_integrated_conv/integrated_conv.py
View file @
86e3a617
...
...
@@ -62,11 +62,13 @@ def _integrated_conv_backward_dispatcher(input: torch.Tensor,
if
input
.
is_cuda
:
if
torch_integrated_conv_cuda
is
None
:
raise
EnvironmentError
(
f
'Failed to load native CUDA module'
)
# Actually it's not a hard requirement that these things be contiguous.
return
tuple
(
torch_integrated_conv_cuda
.
integrated_conv_backward_cuda
(
input
.
contiguous
(),
pos_add
.
contiguous
(),
pos_mul
.
contiguous
()))
input
.
contiguous
(),
pos_add
.
contiguous
(),
pos_mul
.
contiguous
(),
grad_output
))
else
:
return
tuple
(
torch_integrated_conv_cpu
.
integrated_conv_backward_cpu
(
input
,
pos_add
,
pos_mul
))
input
,
pos_add
,
pos_mul
,
grad_output
))
...
...
torch_integrated_conv/integrated_conv_cuda_kernel.cu
View file @
86e3a617
...
...
@@ -212,6 +212,8 @@ void integrated_conv_kernel(
if
(
relu
>
0.0
)
sum
+=
relu
*
pos_mul_buf
[
pos_in_kernel
];
}
// Sync threads because src_img_buf is also used above.
__syncthreads
();
// Aggregate `sum` over threads
sum
=
tiled_warp_reduce_sum
(
threads_per_opixel
,
src_img_buf
,
sum
);
if
(
threadIdx
.
x
%
threads_per_opixel
==
0
&&
h
<
H
&&
w
<
W
)
{
...
...
torch_integrated_conv/integrated_conv_test.py
View file @
86e3a617
...
...
@@ -19,11 +19,19 @@ def test_integrated_conv_zeros():
kW
=
5
pos_add
=
torch
.
zeros
(
C
,
kH
,
kW
,
device
=
device
,
dtype
=
dtype
)
pos_mul
=
torch
.
zeros
(
C
,
kH
,
kW
,
device
=
device
,
dtype
=
dtype
)
input
.
requires_grad
=
True
pos_add
.
requires_grad
=
True
pos_mul
.
requires_grad
=
True
output_ref
=
torch
.
zeros
(
N
,
C
,
H
,
W
,
device
=
device
,
dtype
=
dtype
)
output
=
integrated_conv
(
input
,
pos_add
,
pos_mul
)
assert
torch
.
allclose
(
output
,
output_ref
)
output
.
sum
().
backward
()
print
(
"input_grad="
,
input
.
grad
)
print
(
"pos_add_grad="
,
pos_add
.
grad
)
print
(
"pos_mul_grad="
,
pos_mul
.
grad
)
def
test_integrated_conv_compare
():
N
=
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment