Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
934d0b8b
Unverified
Commit
934d0b8b
authored
Mar 06, 2023
by
saswatmeher
Committed by
GitHub
Mar 06, 2023
Browse files
Fix bert issue (#21963)
Co-authored-by:
saswatmeher
<
saswatmeher@cse.iitb.ac.in
>
parent
0bb17295
Changes
20
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
140 additions
and
100 deletions
+140
-100
src/transformers/models/align/modeling_align.py
src/transformers/models/align/modeling_align.py
+7
-5
src/transformers/models/altclip/modeling_altclip.py
src/transformers/models/altclip/modeling_altclip.py
+7
-5
src/transformers/models/bert/modeling_bert.py
src/transformers/models/bert/modeling_bert.py
+7
-5
src/transformers/models/bert_generation/modeling_bert_generation.py
...ormers/models/bert_generation/modeling_bert_generation.py
+7
-5
src/transformers/models/bridgetower/modeling_bridgetower.py
src/transformers/models/bridgetower/modeling_bridgetower.py
+7
-5
src/transformers/models/camembert/modeling_camembert.py
src/transformers/models/camembert/modeling_camembert.py
+7
-5
src/transformers/models/chinese_clip/modeling_chinese_clip.py
...transformers/models/chinese_clip/modeling_chinese_clip.py
+7
-5
src/transformers/models/clap/modeling_clap.py
src/transformers/models/clap/modeling_clap.py
+7
-5
src/transformers/models/data2vec/modeling_data2vec_text.py
src/transformers/models/data2vec/modeling_data2vec_text.py
+7
-5
src/transformers/models/electra/modeling_electra.py
src/transformers/models/electra/modeling_electra.py
+7
-5
src/transformers/models/ernie/modeling_ernie.py
src/transformers/models/ernie/modeling_ernie.py
+7
-5
src/transformers/models/layoutlm/modeling_layoutlm.py
src/transformers/models/layoutlm/modeling_layoutlm.py
+7
-5
src/transformers/models/markuplm/modeling_markuplm.py
src/transformers/models/markuplm/modeling_markuplm.py
+7
-5
src/transformers/models/nezha/modeling_nezha.py
src/transformers/models/nezha/modeling_nezha.py
+7
-5
src/transformers/models/realm/modeling_realm.py
src/transformers/models/realm/modeling_realm.py
+7
-5
src/transformers/models/roberta/modeling_roberta.py
src/transformers/models/roberta/modeling_roberta.py
+7
-5
src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py
...els/roberta_prelayernorm/modeling_roberta_prelayernorm.py
+7
-5
src/transformers/models/roc_bert/modeling_roc_bert.py
src/transformers/models/roc_bert/modeling_roc_bert.py
+7
-5
src/transformers/models/splinter/modeling_splinter.py
src/transformers/models/splinter/modeling_splinter.py
+7
-5
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
+7
-5
No files found.
src/transformers/models/align/modeling_align.py
View file @
934d0b8b
...
@@ -1077,6 +1077,13 @@ class AlignTextEncoder(nn.Module):
...
@@ -1077,6 +1077,13 @@ class AlignTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -1086,11 +1093,6 @@ class AlignTextEncoder(nn.Module):
...
@@ -1086,11 +1093,6 @@ class AlignTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/altclip/modeling_altclip.py
View file @
934d0b8b
...
@@ -628,6 +628,13 @@ class AltRobertaEncoder(nn.Module):
...
@@ -628,6 +628,13 @@ class AltRobertaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -637,11 +644,6 @@ class AltRobertaEncoder(nn.Module):
...
@@ -637,11 +644,6 @@ class AltRobertaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/bert/modeling_bert.py
View file @
934d0b8b
...
@@ -575,6 +575,13 @@ class BertEncoder(nn.Module):
...
@@ -575,6 +575,13 @@ class BertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -584,11 +591,6 @@ class BertEncoder(nn.Module):
...
@@ -584,11 +591,6 @@ class BertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/bert_generation/modeling_bert_generation.py
View file @
934d0b8b
...
@@ -385,6 +385,13 @@ class BertEncoder(nn.Module):
...
@@ -385,6 +385,13 @@ class BertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -394,11 +401,6 @@ class BertEncoder(nn.Module):
...
@@ -394,11 +401,6 @@ class BertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/bridgetower/modeling_bridgetower.py
View file @
934d0b8b
...
@@ -760,6 +760,13 @@ class BridgeTowerTextEncoder(nn.Module):
...
@@ -760,6 +760,13 @@ class BridgeTowerTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -769,11 +776,6 @@ class BridgeTowerTextEncoder(nn.Module):
...
@@ -769,11 +776,6 @@ class BridgeTowerTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/camembert/modeling_camembert.py
View file @
934d0b8b
...
@@ -506,6 +506,13 @@ class CamembertEncoder(nn.Module):
...
@@ -506,6 +506,13 @@ class CamembertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -515,11 +522,6 @@ class CamembertEncoder(nn.Module):
...
@@ -515,11 +522,6 @@ class CamembertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/chinese_clip/modeling_chinese_clip.py
View file @
934d0b8b
...
@@ -891,6 +891,13 @@ class ChineseCLIPTextEncoder(nn.Module):
...
@@ -891,6 +891,13 @@ class ChineseCLIPTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -900,11 +907,6 @@ class ChineseCLIPTextEncoder(nn.Module):
...
@@ -900,11 +907,6 @@ class ChineseCLIPTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/clap/modeling_clap.py
View file @
934d0b8b
...
@@ -1578,6 +1578,13 @@ class ClapTextEncoder(nn.Module):
...
@@ -1578,6 +1578,13 @@ class ClapTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -1587,11 +1594,6 @@ class ClapTextEncoder(nn.Module):
...
@@ -1587,11 +1594,6 @@ class ClapTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/data2vec/modeling_data2vec_text.py
View file @
934d0b8b
...
@@ -492,6 +492,13 @@ class Data2VecTextEncoder(nn.Module):
...
@@ -492,6 +492,13 @@ class Data2VecTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -501,11 +508,6 @@ class Data2VecTextEncoder(nn.Module):
...
@@ -501,11 +508,6 @@ class Data2VecTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/electra/modeling_electra.py
View file @
934d0b8b
...
@@ -553,6 +553,13 @@ class ElectraEncoder(nn.Module):
...
@@ -553,6 +553,13 @@ class ElectraEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -562,11 +569,6 @@ class ElectraEncoder(nn.Module):
...
@@ -562,11 +569,6 @@ class ElectraEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/ernie/modeling_ernie.py
View file @
934d0b8b
...
@@ -488,6 +488,13 @@ class ErnieEncoder(nn.Module):
...
@@ -488,6 +488,13 @@ class ErnieEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -497,11 +504,6 @@ class ErnieEncoder(nn.Module):
...
@@ -497,11 +504,6 @@ class ErnieEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/layoutlm/modeling_layoutlm.py
View file @
934d0b8b
...
@@ -469,6 +469,13 @@ class LayoutLMEncoder(nn.Module):
...
@@ -469,6 +469,13 @@ class LayoutLMEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -478,11 +485,6 @@ class LayoutLMEncoder(nn.Module):
...
@@ -478,11 +485,6 @@ class LayoutLMEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/markuplm/modeling_markuplm.py
View file @
934d0b8b
...
@@ -630,6 +630,13 @@ class MarkupLMEncoder(nn.Module):
...
@@ -630,6 +630,13 @@ class MarkupLMEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -639,11 +646,6 @@ class MarkupLMEncoder(nn.Module):
...
@@ -639,11 +646,6 @@ class MarkupLMEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/nezha/modeling_nezha.py
View file @
934d0b8b
...
@@ -561,6 +561,13 @@ class NezhaEncoder(nn.Module):
...
@@ -561,6 +561,13 @@ class NezhaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -570,11 +577,6 @@ class NezhaEncoder(nn.Module):
...
@@ -570,11 +577,6 @@ class NezhaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/realm/modeling_realm.py
View file @
934d0b8b
...
@@ -568,6 +568,13 @@ class RealmEncoder(nn.Module):
...
@@ -568,6 +568,13 @@ class RealmEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -577,11 +584,6 @@ class RealmEncoder(nn.Module):
...
@@ -577,11 +584,6 @@ class RealmEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/roberta/modeling_roberta.py
View file @
934d0b8b
...
@@ -492,6 +492,13 @@ class RobertaEncoder(nn.Module):
...
@@ -492,6 +492,13 @@ class RobertaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -501,11 +508,6 @@ class RobertaEncoder(nn.Module):
...
@@ -501,11 +508,6 @@ class RobertaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py
View file @
934d0b8b
...
@@ -494,6 +494,13 @@ class RobertaPreLayerNormEncoder(nn.Module):
...
@@ -494,6 +494,13 @@ class RobertaPreLayerNormEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -503,11 +510,6 @@ class RobertaPreLayerNormEncoder(nn.Module):
...
@@ -503,11 +510,6 @@ class RobertaPreLayerNormEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/roc_bert/modeling_roc_bert.py
View file @
934d0b8b
...
@@ -626,6 +626,13 @@ class RoCBertEncoder(nn.Module):
...
@@ -626,6 +626,13 @@ class RoCBertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -635,11 +642,6 @@ class RoCBertEncoder(nn.Module):
...
@@ -635,11 +642,6 @@ class RoCBertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/splinter/modeling_splinter.py
View file @
934d0b8b
...
@@ -441,6 +441,13 @@ class SplinterEncoder(nn.Module):
...
@@ -441,6 +441,13 @@ class SplinterEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -450,11 +457,6 @@ class SplinterEncoder(nn.Module):
...
@@ -450,11 +457,6 @@ class SplinterEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
View file @
934d0b8b
...
@@ -493,6 +493,13 @@ class XLMRobertaEncoder(nn.Module):
...
@@ -493,6 +493,13 @@ class XLMRobertaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -502,11 +509,6 @@ class XLMRobertaEncoder(nn.Module):
...
@@ -502,11 +509,6 @@ class XLMRobertaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment