Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c7f3abc2
Unverified
Commit
c7f3abc2
authored
Feb 27, 2023
by
Stas Bekman
Committed by
GitHub
Feb 27, 2023
Browse files
introduce `logger.warning_once` and use it for grad checkpointing code (#21804)
* logger.warning_once * style
parent
f95f60c8
Changes
58
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
34 additions
and
17 deletions
+34
-17
src/transformers/models/qdqbert/modeling_qdqbert.py
src/transformers/models/qdqbert/modeling_qdqbert.py
+1
-1
src/transformers/models/realm/modeling_realm.py
src/transformers/models/realm/modeling_realm.py
+1
-1
src/transformers/models/rembert/modeling_rembert.py
src/transformers/models/rembert/modeling_rembert.py
+1
-1
src/transformers/models/roberta/modeling_roberta.py
src/transformers/models/roberta/modeling_roberta.py
+1
-1
src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py
...els/roberta_prelayernorm/modeling_roberta_prelayernorm.py
+1
-1
src/transformers/models/roc_bert/modeling_roc_bert.py
src/transformers/models/roc_bert/modeling_roc_bert.py
+1
-1
src/transformers/models/roformer/modeling_roformer.py
src/transformers/models/roformer/modeling_roformer.py
+1
-1
src/transformers/models/speecht5/modeling_speecht5.py
src/transformers/models/speecht5/modeling_speecht5.py
+1
-1
src/transformers/models/splinter/modeling_splinter.py
src/transformers/models/splinter/modeling_splinter.py
+1
-1
src/transformers/models/switch_transformers/modeling_switch_transformers.py
...odels/switch_transformers/modeling_switch_transformers.py
+1
-1
src/transformers/models/t5/modeling_t5.py
src/transformers/models/t5/modeling_t5.py
+1
-1
src/transformers/models/time_series_transformer/modeling_time_series_transformer.py
...me_series_transformer/modeling_time_series_transformer.py
+1
-1
src/transformers/models/trajectory_transformer/modeling_trajectory_transformer.py
...trajectory_transformer/modeling_trajectory_transformer.py
+1
-1
src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py
...sformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py
+1
-1
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
+1
-1
src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py
...sformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py
+1
-1
src/transformers/models/xmod/modeling_xmod.py
src/transformers/models/xmod/modeling_xmod.py
+1
-1
src/transformers/utils/logging.py
src/transformers/utils/logging.py
+17
-0
No files found.
src/transformers/models/qdqbert/modeling_qdqbert.py
View file @
c7f3abc2
...
@@ -575,7 +575,7 @@ class QDQBertEncoder(nn.Module):
...
@@ -575,7 +575,7 @@ class QDQBertEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/realm/modeling_realm.py
View file @
c7f3abc2
...
@@ -578,7 +578,7 @@ class RealmEncoder(nn.Module):
...
@@ -578,7 +578,7 @@ class RealmEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/rembert/modeling_rembert.py
View file @
c7f3abc2
...
@@ -536,7 +536,7 @@ class RemBertEncoder(nn.Module):
...
@@ -536,7 +536,7 @@ class RemBertEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/roberta/modeling_roberta.py
View file @
c7f3abc2
...
@@ -502,7 +502,7 @@ class RobertaEncoder(nn.Module):
...
@@ -502,7 +502,7 @@ class RobertaEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py
View file @
c7f3abc2
...
@@ -504,7 +504,7 @@ class RobertaPreLayerNormEncoder(nn.Module):
...
@@ -504,7 +504,7 @@ class RobertaPreLayerNormEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/roc_bert/modeling_roc_bert.py
View file @
c7f3abc2
...
@@ -636,7 +636,7 @@ class RoCBertEncoder(nn.Module):
...
@@ -636,7 +636,7 @@ class RoCBertEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/roformer/modeling_roformer.py
View file @
c7f3abc2
...
@@ -573,7 +573,7 @@ class RoFormerEncoder(nn.Module):
...
@@ -573,7 +573,7 @@ class RoFormerEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/speecht5/modeling_speecht5.py
View file @
c7f3abc2
...
@@ -1692,7 +1692,7 @@ class SpeechT5Decoder(SpeechT5PreTrainedModel):
...
@@ -1692,7 +1692,7 @@ class SpeechT5Decoder(SpeechT5PreTrainedModel):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/splinter/modeling_splinter.py
View file @
c7f3abc2
...
@@ -451,7 +451,7 @@ class SplinterEncoder(nn.Module):
...
@@ -451,7 +451,7 @@ class SplinterEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/switch_transformers/modeling_switch_transformers.py
View file @
c7f3abc2
...
@@ -1057,7 +1057,7 @@ class SwitchTransformersStack(SwitchTransformersPreTrainedModel):
...
@@ -1057,7 +1057,7 @@ class SwitchTransformersStack(SwitchTransformersPreTrainedModel):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/t5/modeling_t5.py
View file @
c7f3abc2
...
@@ -1037,7 +1037,7 @@ class T5Stack(T5PreTrainedModel):
...
@@ -1037,7 +1037,7 @@ class T5Stack(T5PreTrainedModel):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/time_series_transformer/modeling_time_series_transformer.py
View file @
c7f3abc2
...
@@ -1471,7 +1471,7 @@ class TimeSeriesTransformerDecoder(TimeSeriesTransformerPreTrainedModel):
...
@@ -1471,7 +1471,7 @@ class TimeSeriesTransformerDecoder(TimeSeriesTransformerPreTrainedModel):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/trajectory_transformer/modeling_trajectory_transformer.py
View file @
c7f3abc2
...
@@ -543,7 +543,7 @@ class TrajectoryTransformerModel(TrajectoryTransformerPreTrainedModel):
...
@@ -543,7 +543,7 @@ class TrajectoryTransformerModel(TrajectoryTransformerPreTrainedModel):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/xlm_prophetnet/modeling_xlm_prophetnet.py
View file @
c7f3abc2
...
@@ -1595,7 +1595,7 @@ class XLMProphetNetDecoder(XLMProphetNetPreTrainedModel):
...
@@ -1595,7 +1595,7 @@ class XLMProphetNetDecoder(XLMProphetNetPreTrainedModel):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/xlm_roberta/modeling_xlm_roberta.py
View file @
c7f3abc2
...
@@ -503,7 +503,7 @@ class XLMRobertaEncoder(nn.Module):
...
@@ -503,7 +503,7 @@ class XLMRobertaEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py
View file @
c7f3abc2
...
@@ -492,7 +492,7 @@ class XLMRobertaXLEncoder(nn.Module):
...
@@ -492,7 +492,7 @@ class XLMRobertaXLEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/models/xmod/modeling_xmod.py
View file @
c7f3abc2
...
@@ -566,7 +566,7 @@ class XmodEncoder(nn.Module):
...
@@ -566,7 +566,7 @@ class XmodEncoder(nn.Module):
if
self
.
gradient_checkpointing
and
self
.
training
:
if
self
.
gradient_checkpointing
and
self
.
training
:
if
use_cache
:
if
use_cache
:
logger
.
warning
(
logger
.
warning
_once
(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
)
use_cache
=
False
use_cache
=
False
...
...
src/transformers/utils/logging.py
View file @
c7f3abc2
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
# limitations under the License.
# limitations under the License.
""" Logging utilities."""
""" Logging utilities."""
import
functools
import
logging
import
logging
import
os
import
os
import
sys
import
sys
...
@@ -281,6 +283,21 @@ def warning_advice(self, *args, **kwargs):
...
@@ -281,6 +283,21 @@ def warning_advice(self, *args, **kwargs):
logging
.
Logger
.
warning_advice
=
warning_advice
logging
.
Logger
.
warning_advice
=
warning_advice
@
functools
.
lru_cache
(
None
)
def
warning_once
(
self
,
*
args
,
**
kwargs
):
"""
This method is identical to `logger.warning()`, but will emit the warning with the same message only once
Note: The cache is for the function arguments, so 2 different callers using the same arguments will hit the cache.
The assumption here is that all warning messages are unique across the code. If they aren't then need to switch to
another type of cache that includes the caller frame information in the hashing function.
"""
self
.
warning
(
*
args
,
**
kwargs
)
logging
.
Logger
.
warning_once
=
warning_once
class
EmptyTqdm
:
class
EmptyTqdm
:
"""Dummy tqdm which doesn't do anything."""
"""Dummy tqdm which doesn't do anything."""
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment