Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
79b2cc28
Commit
79b2cc28
authored
Mar 12, 2020
by
Thor Johnsen
Browse files
Bug fix
parent
bd6b1ebc
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
13 deletions
+11
-13
apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu
apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu
+11
-13
No files found.
apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu
View file @
79b2cc28
...
...
@@ -79,7 +79,7 @@ __global__ void adam_cuda_kernel(
T
pi
[
ILP
];
T
gi
[
ILP
];
bool
overflow
=
F
alse
;
bool
overflow
=
f
alse
;
for
(
int
j_start
=
0
;
j_start
<
tsize
;
j_start
+=
totThreads
*
ILP
)
{
#pragma unroll
for
(
int
ii
=
0
;
ii
<
ILP
;
ii
++
)
{
...
...
@@ -99,7 +99,6 @@ __global__ void adam_cuda_kernel(
#pragma unroll
for
(
int
ii
=
0
;
ii
<
ILP
;
ii
++
)
{
int
j
=
j_start
+
i
*
ILP
;
T
scaled_grad
=
gi
[
ii
]
/
grad_scale
;
if
(
isfinite
(
scaled_grad
))
{
mi
[
ii
]
=
b1
*
mi
[
ii
]
+
(
1
-
b1
)
*
scaled_grad
;
...
...
@@ -112,7 +111,7 @@ __global__ void adam_cuda_kernel(
float
update
=
(
mi
[
ii
]
/
denom
)
+
(
decay
*
pi
[
ii
]);
pi
[
ii
]
=
pi
[
ii
]
-
(
step_size
*
update
);
}
else
{
overflow
=
T
rue
;
overflow
=
t
rue
;
}
}
...
...
@@ -137,7 +136,7 @@ __global__ void adam_cuda_kernel(
}
template
<
typename
T
,
typename
GRAD_T
>
__global__
__device__
void
adam_undo_cuda_kernel
(
__global__
void
adam_undo_cuda_kernel
(
T
*
__restrict__
p
,
T
*
__restrict__
m
,
T
*
__restrict__
v
,
...
...
@@ -182,7 +181,6 @@ __global__ __device__ void adam_undo_cuda_kernel(
#pragma unroll
for
(
int
ii
=
0
;
ii
<
ILP
;
ii
++
)
{
int
j
=
j_start
+
i
*
ILP
;
T
scaled_grad
=
gi
[
ii
]
/
grad_scale
;
if
(
isfinite
(
scaled_grad
))
{
float
denom
;
...
...
@@ -195,7 +193,7 @@ __global__ __device__ void adam_undo_cuda_kernel(
vi
[
ii
]
=
(
vi
[
ii
]
-
(
1
-
b2
)
*
scaled_grad
*
scaled_grad
)
/
b2
;
// Make sure round off errors don't create (small) negative value.
// This can happen if we have to revert the very first step.
vi
i
[
ii
]
=
vi
i
[
i
]
>=
0.0
f
?
vi
[
ii
]
:
0.0
f
;
vi
[
ii
]
=
vi
[
i
i
]
>=
0.0
f
?
vi
[
ii
]
:
0.0
f
;
}
}
...
...
@@ -252,7 +250,7 @@ struct AdamFunctor
T
pi
[
ILP
];
T
gi
[
ILP
];
bool
overflow
=
F
alse
;
bool
overflow
=
f
alse
;
for
(
int
j_start
=
0
;
j_start
<
dim
;
j_start
+=
blockDim
.
x
*
ILP
)
{
#pragma unroll
for
(
int
ii
=
0
;
ii
<
ILP
;
ii
++
)
{
...
...
@@ -262,7 +260,7 @@ struct AdamFunctor
gi
[
ii
]
=
GRAD_T
(
0
);
int
j
=
j_start
+
threadIdx
.
x
+
ii
*
blockDim
.
x
;
if
(
j
<
tsize
)
{
if
(
j
<
dim
)
{
pi
[
ii
]
=
p
[
j
];
mi
[
ii
]
=
m
[
j
];
vi
[
ii
]
=
v
[
j
];
...
...
@@ -285,14 +283,14 @@ struct AdamFunctor
float
update
=
(
mi
[
ii
]
/
denom
)
+
(
decay
*
pi
[
ii
]);
pi
[
ii
]
=
pi
[
ii
]
-
(
step_size
*
update
);
}
else
{
overflow
=
T
rue
;
overflow
=
t
rue
;
}
}
#pragma unroll
for
(
int
ii
=
0
;
ii
<
ILP
;
ii
++
)
{
int
j
=
j_start
+
threadIdx
.
x
+
ii
*
blockDim
.
x
;
if
(
j
<
tsize
)
{
if
(
j
<
dim
)
{
m
[
j
]
=
mi
[
ii
];
v
[
j
]
=
vi
[
ii
];
p
[
j
]
=
pi
[
ii
];
...
...
@@ -352,7 +350,7 @@ struct AdamUndoFunctor
gi
[
ii
]
=
GRAD_T
(
0
);
int
j
=
j_start
+
threadIdx
.
x
+
ii
*
blockDim
.
x
;
if
(
j
<
tsize
)
{
if
(
j
<
dim
)
{
pi
[
ii
]
=
p
[
j
];
mi
[
ii
]
=
m
[
j
];
vi
[
ii
]
=
v
[
j
];
...
...
@@ -375,14 +373,14 @@ struct AdamUndoFunctor
vi
[
ii
]
=
(
vi
[
ii
]
-
(
1
-
b2
)
*
scaled_grad
*
scaled_grad
)
/
b2
;
// Make sure round off errors don't create (small) negative value.
// This can happen if we have to revert the very first step.
vi
i
[
ii
]
=
vi
i
[
i
]
>=
0.0
f
?
vi
[
ii
]
:
0.0
f
;
vi
[
ii
]
=
vi
[
i
i
]
>=
0.0
f
?
vi
[
ii
]
:
0.0
f
;
}
}
#pragma unroll
for
(
int
ii
=
0
;
ii
<
ILP
;
ii
++
)
{
int
j
=
j_start
+
threadIdx
.
x
+
ii
*
blockDim
.
x
;
if
(
j
<
tsize
)
{
if
(
j
<
dim
)
{
m
[
j
]
=
mi
[
ii
];
v
[
j
]
=
vi
[
ii
];
p
[
j
]
=
pi
[
ii
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment