Fix torch.fx import issue for torch 1.12 (#27570)

* Fix torch.fx import issue for torch 1.12 * Fix up * Python verion dependent import * Woops - fix * Fix

Fix torch.fx import issue for torch 1.12 (#27570)
* Fix torch.fx import issue for torch 1.12 * Fix up * Python verion dependent import * Woops - fix * Fix
38e2633f · amyeroberts · GitHub · f18c95b4 · 38e2633f · 38e2633f
Unverified Commit 38e2633f authored Nov 20, 2023 by amyeroberts Committed by GitHub Nov 20, 2023
3 changed files
--- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py
@@ -36,6 +36,7 @@ from ...modeling_outputs import (
    TokenClassifierOutput,
 )
 from ...modeling_utils import PreTrainedModel
+from ...pytorch_utils import is_torch_greater_or_equal_than_1_13
 from ...utils import (
    add_code_sample_docstrings,
    add_start_docstrings,
@@ -55,6 +56,9 @@ if is_flash_attn_2_available():
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.
 if is_torch_fx_available():
+    if not is_torch_greater_or_equal_than_1_13:
+        import torch.fx
    _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)

--- a/src/transformers/models/llama/modeling_llama.py
+++ b/src/transformers/models/llama/modeling_llama.py
@@ -32,7 +32,7 @@ from ...activations import ACT2FN
 from ...modeling_attn_mask_utils import AttentionMaskConverter, _prepare_4d_causal_attention_mask
 from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
 from ...modeling_utils import PreTrainedModel
-from ...pytorch_utils import ALL_LAYERNORM_LAYERS
+from ...pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13
 from ...utils import (
    add_start_docstrings,
    add_start_docstrings_to_model_forward,
@@ -52,6 +52,9 @@ if is_flash_attn_2_available():
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.
 if is_torch_fx_available():
+    if not is_torch_greater_or_equal_than_1_13:
+        import torch.fx
    _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)

--- a/src/transformers/pytorch_utils.py
+++ b/src/transformers/pytorch_utils.py
@@ -30,6 +30,7 @@ parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_
 is_torch_greater_or_equal_than_2_1 = parsed_torch_version_base >= version.parse("2.1")
 is_torch_greater_or_equal_than_2_0 = parsed_torch_version_base >= version.parse("2.0")
+is_torch_greater_or_equal_than_1_13 = parsed_torch_version_base >= version.parse("1.13")
 is_torch_greater_or_equal_than_1_12 = parsed_torch_version_base >= version.parse("1.12")
 is_torch_greater_or_equal_than_1_11 = parsed_torch_version_base >= version.parse("1.11")
 is_torch_less_than_1_11 = parsed_torch_version_base < version.parse("1.11")