Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
934d0b8b
Unverified
Commit
934d0b8b
authored
Mar 06, 2023
by
saswatmeher
Committed by
GitHub
Mar 06, 2023
Browse files
Fix bert issue (#21963)
Co-authored-by:
saswatmeher
<
saswatmeher@cse.iitb.ac.in
>
parent
0bb17295
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
140 additions
and
100 deletions
+140
-100
src/transformers/models/align/modeling_align.py
src/transformers/models/align/modeling_align.py
+7
-5
src/transformers/models/altclip/modeling_altclip.py
src/transformers/models/altclip/modeling_altclip.py
+7
-5
src/transformers/models/bert/modeling_bert.py
src/transformers/models/bert/modeling_bert.py
+7
-5
src/transformers/models/bert_generation/modeling_bert_generation.py
...ormers/models/bert_generation/modeling_bert_generation.py
+7
-5
src/transformers/models/bridgetower/modeling_bridgetower.py
src/transformers/models/bridgetower/modeling_bridgetower.py
+7
-5
src/transformers/models/camembert/modeling_camembert.py
src/transformers/models/camembert/modeling_camembert.py
+7
-5
src/transformers/models/chinese_clip/modeling_chinese_clip.py
...transformers/models/chinese_clip/modeling_chinese_clip.py
+7
-5
src/transformers/models/clap/modeling_clap.py
src/transformers/models/clap/modeling_clap.py
+7
-5
src/transformers/models/data2vec/modeling_data2vec_text.py
src/transformers/models/data2vec/modeling_data2vec_text.py
+7
-5
src/transformers/models/electra/modeling_electra.py
src/transformers/models/electra/modeling_electra.py
+7
-5
src/transformers/models/ernie/modeling_ernie.py
src/transformers/models/ernie/modeling_ernie.py
+7
-5
src/transformers/models/layoutlm/modeling_layoutlm.py
src/transformers/models/layoutlm/modeling_layoutlm.py
+7
-5
src/transformers/models/markuplm/modeling_markuplm.py
src/transformers/models/markuplm/modeling_markuplm.py
+7
-5
src/transformers/models/nezha/modeling_nezha.py
src/transformers/models/nezha/modeling_nezha.py
+7
-5
src/transformers/models/realm/modeling_realm.py
src/transformers/models/realm/modeling_realm.py
+7
-5
src/transformers/models/roberta/modeling_roberta.py
src/transformers/models/roberta/modeling_roberta.py
+7
-5
src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py
...els/roberta_prelayernorm/modeling_roberta_prelayernorm.py
+7
-5
src/transformers/models/roc_bert/modeling_roc_bert.py
src/transformers/models/roc_bert/modeling_roc_bert.py
+7
-5
src/transformers/models/splinter/modeling_splinter.py
src/transformers/models/splinter/modeling_splinter.py
+7
-5
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
+7
-5
No files found.
src/transformers/models/align/modeling_align.py
View file @
934d0b8b
...
@@ -1077,6 +1077,13 @@ class AlignTextEncoder(nn.Module):
...
@@ -1077,6 +1077,13 @@ class AlignTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -1086,11 +1093,6 @@ class AlignTextEncoder(nn.Module):
...
@@ -1086,11 +1093,6 @@ class AlignTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/altclip/modeling_altclip.py
View file @
934d0b8b
...
@@ -628,6 +628,13 @@ class AltRobertaEncoder(nn.Module):
...
@@ -628,6 +628,13 @@ class AltRobertaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -637,11 +644,6 @@ class AltRobertaEncoder(nn.Module):
...
@@ -637,11 +644,6 @@ class AltRobertaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/bert/modeling_bert.py
View file @
934d0b8b
...
@@ -575,6 +575,13 @@ class BertEncoder(nn.Module):
...
@@ -575,6 +575,13 @@ class BertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -584,11 +591,6 @@ class BertEncoder(nn.Module):
...
@@ -584,11 +591,6 @@ class BertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/bert_generation/modeling_bert_generation.py
View file @
934d0b8b
...
@@ -385,6 +385,13 @@ class BertEncoder(nn.Module):
...
@@ -385,6 +385,13 @@ class BertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -394,11 +401,6 @@ class BertEncoder(nn.Module):
...
@@ -394,11 +401,6 @@ class BertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/bridgetower/modeling_bridgetower.py
View file @
934d0b8b
...
@@ -760,6 +760,13 @@ class BridgeTowerTextEncoder(nn.Module):
...
@@ -760,6 +760,13 @@ class BridgeTowerTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -769,11 +776,6 @@ class BridgeTowerTextEncoder(nn.Module):
...
@@ -769,11 +776,6 @@ class BridgeTowerTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/camembert/modeling_camembert.py
View file @
934d0b8b
...
@@ -506,6 +506,13 @@ class CamembertEncoder(nn.Module):
...
@@ -506,6 +506,13 @@ class CamembertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -515,11 +522,6 @@ class CamembertEncoder(nn.Module):
...
@@ -515,11 +522,6 @@ class CamembertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/chinese_clip/modeling_chinese_clip.py
View file @
934d0b8b
...
@@ -891,6 +891,13 @@ class ChineseCLIPTextEncoder(nn.Module):
...
@@ -891,6 +891,13 @@ class ChineseCLIPTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -900,11 +907,6 @@ class ChineseCLIPTextEncoder(nn.Module):
...
@@ -900,11 +907,6 @@ class ChineseCLIPTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/clap/modeling_clap.py
View file @
934d0b8b
...
@@ -1578,6 +1578,13 @@ class ClapTextEncoder(nn.Module):
...
@@ -1578,6 +1578,13 @@ class ClapTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -1587,11 +1594,6 @@ class ClapTextEncoder(nn.Module):
...
@@ -1587,11 +1594,6 @@ class ClapTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/data2vec/modeling_data2vec_text.py
View file @
934d0b8b
...
@@ -492,6 +492,13 @@ class Data2VecTextEncoder(nn.Module):
...
@@ -492,6 +492,13 @@ class Data2VecTextEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -501,11 +508,6 @@ class Data2VecTextEncoder(nn.Module):
...
@@ -501,11 +508,6 @@ class Data2VecTextEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/electra/modeling_electra.py
View file @
934d0b8b
...
@@ -553,6 +553,13 @@ class ElectraEncoder(nn.Module):
...
@@ -553,6 +553,13 @@ class ElectraEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -562,11 +569,6 @@ class ElectraEncoder(nn.Module):
...
@@ -562,11 +569,6 @@ class ElectraEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/ernie/modeling_ernie.py
View file @
934d0b8b
...
@@ -488,6 +488,13 @@ class ErnieEncoder(nn.Module):
...
@@ -488,6 +488,13 @@ class ErnieEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -497,11 +504,6 @@ class ErnieEncoder(nn.Module):
...
@@ -497,11 +504,6 @@ class ErnieEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/layoutlm/modeling_layoutlm.py
View file @
934d0b8b
...
@@ -469,6 +469,13 @@ class LayoutLMEncoder(nn.Module):
...
@@ -469,6 +469,13 @@ class LayoutLMEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -478,11 +485,6 @@ class LayoutLMEncoder(nn.Module):
...
@@ -478,11 +485,6 @@ class LayoutLMEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/markuplm/modeling_markuplm.py
View file @
934d0b8b
...
@@ -630,6 +630,13 @@ class MarkupLMEncoder(nn.Module):
...
@@ -630,6 +630,13 @@ class MarkupLMEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -639,11 +646,6 @@ class MarkupLMEncoder(nn.Module):
...
@@ -639,11 +646,6 @@ class MarkupLMEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/nezha/modeling_nezha.py
View file @
934d0b8b
...
@@ -561,6 +561,13 @@ class NezhaEncoder(nn.Module):
...
@@ -561,6 +561,13 @@ class NezhaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -570,11 +577,6 @@ class NezhaEncoder(nn.Module):
...
@@ -570,11 +577,6 @@ class NezhaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/realm/modeling_realm.py
View file @
934d0b8b
...
@@ -568,6 +568,13 @@ class RealmEncoder(nn.Module):
...
@@ -568,6 +568,13 @@ class RealmEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -577,11 +584,6 @@ class RealmEncoder(nn.Module):
...
@@ -577,11 +584,6 @@ class RealmEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/roberta/modeling_roberta.py
View file @
934d0b8b
...
@@ -492,6 +492,13 @@ class RobertaEncoder(nn.Module):
...
@@ -492,6 +492,13 @@ class RobertaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -501,11 +508,6 @@ class RobertaEncoder(nn.Module):
...
@@ -501,11 +508,6 @@ class RobertaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py
View file @
934d0b8b
...
@@ -494,6 +494,13 @@ class RobertaPreLayerNormEncoder(nn.Module):
...
@@ -494,6 +494,13 @@ class RobertaPreLayerNormEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -503,11 +510,6 @@ class RobertaPreLayerNormEncoder(nn.Module):
...
@@ -503,11 +510,6 @@ class RobertaPreLayerNormEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/roc_bert/modeling_roc_bert.py
View file @
934d0b8b
...
@@ -626,6 +626,13 @@ class RoCBertEncoder(nn.Module):
...
@@ -626,6 +626,13 @@ class RoCBertEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -635,11 +642,6 @@ class RoCBertEncoder(nn.Module):
...
@@ -635,11 +642,6 @@ class RoCBertEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/splinter/modeling_splinter.py
View file @
934d0b8b
...
@@ -441,6 +441,13 @@ class SplinterEncoder(nn.Module):
...
@@ -441,6 +441,13 @@ class SplinterEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -450,11 +457,6 @@ class SplinterEncoder(nn.Module):
...
@@ -450,11 +457,6 @@ class SplinterEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
View file @
934d0b8b
...
@@ -493,6 +493,13 @@ class XLMRobertaEncoder(nn.Module):
...
@@ -493,6 +493,13 @@ class XLMRobertaEncoder(nn.Module):
all_self_attentions
=
()
if
output_attentions
else
None
all_self_attentions
=
()
if
output_attentions
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
all_cross_attentions
=
()
if
output_attentions
and
self
.
config
.
add_cross_attention
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
next_decoder_cache
=
()
if
use_cache
else
None
next_decoder_cache
=
()
if
use_cache
else
None
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
for
i
,
layer_module
in
enumerate
(
self
.
layer
):
if
output_hidden_states
:
if
output_hidden_states
:
...
@@ -502,11 +509,6 @@ class XLMRobertaEncoder(nn.Module):
...
@@ -502,11 +509,6 @@ class XLMRobertaEncoder(nn.Module):
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
past_key_value
=
past_key_values
[
i
]
if
past_key_values
is
not
None
else
None
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
logger
.
warning_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache
=
False
def
create_custom_forward
(
module
):
def
create_custom_forward
(
module
):
def
custom_forward
(
*
inputs
):
def
custom_forward
(
*
inputs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment