Enforce torch.float32 for ms_deform_attn when using AMP

Reviewed By: stephenyan1231 Differential Revision: D30225977 fbshipit-source-id: 479b96acc7f90a8ee2373ab44112e21086e9d1d2

Enforce torch.float32 for ms_deform_attn when using AMP
Reviewed By: stephenyan1231 Differential Revision: D30225977 fbshipit-source-id: 479b96acc7f90a8ee2373ab44112e21086e9d1d2
fd79c680 · Valentin Andrei · Facebook GitHub Bot · cb985322 · fd79c680
Commit fd79c680 authored Aug 10, 2021 by Valentin Andrei Committed by Facebook GitHub Bot Aug 10, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 0 deletions

projects_oss/detr/detr/functions/ms_deform_attn_func.py projects_oss/detr/detr/functions/ms_deform_attn_func.py +11 -0

No files found.
--- a/projects_oss/detr/detr/functions/ms_deform_attn_func.py
+++ b/projects_oss/detr/detr/functions/ms_deform_attn_func.py
@@ -16,12 +16,22 @@ import torch
 import torch.nn.functional as F
 from torch.autograd import Function
 from torch.autograd.function import once_differentiable
+from torch.cuda.amp.autocast_mode import custom_bwd, custom_fwd
 from detr import _C as MSDA
 class MSDeformAttnFunction(Function):
+    # The @custom_fwd and @custom_bwd decorators are used in this case to allow enabling of
+    # Automatic Mixed Precision when we do not have implementations of custom CUDA kernels for
+    # all the precision types.
+    #
+    # TODO: After implementing `ms_deform_attn` CUDA kernels for FP16, we can remove the
+    # custom_fwd and custom_bwd decorators
    @staticmethod
+    @custom_fwd(cast_inputs=torch.float32)
    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
        ctx.im2col_step = im2col_step
        output = MSDA.ms_deform_attn_forward(
@@ -31,6 +41,7 @@ class MSDeformAttnFunction(Function):
    @staticmethod
    @once_differentiable
+    @custom_bwd
    def backward(ctx, grad_output):
        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
        grad_value, grad_sampling_loc, grad_attn_weight = \