Fix: Change tensors to integers for torch.dynamo and torch.compile compatibility (#23475)

* Fix: Change tensors to integers in torch.split() for torch.dynamo and torch.compile compatibility * Applied the suggested fix to the utils/check_copies.py test * Applied the suggested fix by changing the original function that gets copied

Fix: Change tensors to integers for torch.dynamo and torch.compile compatibility (#23475)
* Fix: Change tensors to integers in torch.split() for torch.dynamo and torch.compile compatibility * Applied the suggested fix to the utils/check_copies.py test * Applied the suggested fix by changing the original function that gets copied
847e5691 · Dennis Loevlie · GitHub · 389bdba6 · 847e5691 · 847e5691
Unverified Commit 847e5691 authored May 19, 2023 by Dennis Loevlie Committed by GitHub May 19, 2023
4 changed files
--- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py
@@ -550,7 +550,7 @@ def multi_scale_deformable_attention(
 ) -> Tensor:
    batch_size, _, num_heads, hidden_dim = value.shape
    _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
-    value_list = value.split([height * width for height, width in value_spatial_shapes], dim=1)
+    value_list = value.split([height.item() * width.item() for height, width in value_spatial_shapes], dim=1)
    sampling_grids = 2 * sampling_locations - 1
    sampling_value_list = []
    for level_id, (height, width) in enumerate(value_spatial_shapes):

--- a/src/transformers/models/deta/modeling_deta.py
+++ b/src/transformers/models/deta/modeling_deta.py
@@ -453,7 +453,7 @@ def multi_scale_deformable_attention(
 ) -> Tensor:
    batch_size, _, num_heads, hidden_dim = value.shape
    _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
-    value_list = value.split([height * width for height, width in value_spatial_shapes], dim=1)
+    value_list = value.split([height.item() * width.item() for height, width in value_spatial_shapes], dim=1)
    sampling_grids = 2 * sampling_locations - 1
    sampling_value_list = []
    for level_id, (height, width) in enumerate(value_spatial_shapes):

--- a/src/transformers/models/mask2former/modeling_mask2former.py
+++ b/src/transformers/models/mask2former/modeling_mask2former.py
@@ -810,7 +810,7 @@ def multi_scale_deformable_attention(
 ) -> Tensor:
    batch_size, _, num_heads, hidden_dim = value.shape
    _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
-    value_list = value.split([height * width for height, width in value_spatial_shapes], dim=1)
+    value_list = value.split([height.item() * width.item() for height, width in value_spatial_shapes], dim=1)
    sampling_grids = 2 * sampling_locations - 1
    sampling_value_list = []
    for level_id, (height, width) in enumerate(value_spatial_shapes):
@@ -1340,7 +1340,7 @@ class Mask2FormerPixelDecoder(nn.Module):
            else:
                split_sizes[i] = last_hidden_state.shape[1] - level_start_index[i]
-        encoder_output = torch.split(last_hidden_state, split_sizes, dim=1)
+        encoder_output = torch.split(last_hidden_state, [size.item() for size in split_sizes], dim=1)
        # Compute final features
        outputs = [

--- a/src/transformers/models/oneformer/modeling_oneformer.py
+++ b/src/transformers/models/oneformer/modeling_oneformer.py
@@ -66,7 +66,7 @@ def multi_scale_deformable_attention(
 ) -> Tensor:
    batch_size, _, num_heads, hidden_dim = value.shape
    _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
-    value_list = value.split([height * width for height, width in value_spatial_shapes], dim=1)
+    value_list = value.split([height.item() * width.item() for height, width in value_spatial_shapes], dim=1)
    sampling_grids = 2 * sampling_locations - 1
    sampling_value_list = []
    for level_id, (height, width) in enumerate(value_spatial_shapes):