bugfix: keep cache up-to-date on parameter require_grad-ness

8be1b053 · Carl Case · 9cc74429 · 8be1b053
Commit 8be1b053 authored May 29, 2018 by Carl Case
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 0 deletions

apex/amp/utils.py apex/amp/utils.py +8 -0

No files found.
--- a/apex/amp/utils.py
+++ b/apex/amp/utils.py
@@ -85,7 +85,15 @@ def cached_cast(cast_fn, x, cache):
    if is_nested(x):
        return type(x)([cached_cast(y) for y in x])
    if x in cache:
+        cached_x = cache[x]
+        # During eval, it's possible to end up caching casted weights
+        # with requires_grad == False. This is then a problem when they
+        # get reused on the next train iter. So we ensure that cached
+        # weights have same requires_grad flag of most recent request.
+        if x.requires_grad != cached_x.requires_grad:
+            cached_x.requires_grad_(x.requires_grad)
        return cache[x]
    casted_x = cast_fn(x)
    cache[x] = casted_x
    return casted_x