"git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "e2fc3a9c9cb8188c841a07ee59c01d1b2afd8622"
Unverified Commit c7f3abc2 authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

introduce `logger.warning_once` and use it for grad checkpointing code (#21804)

* logger.warning_once

* style
parent f95f60c8
...@@ -638,7 +638,7 @@ class AltRobertaEncoder(nn.Module): ...@@ -638,7 +638,7 @@ class AltRobertaEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -1085,7 +1085,7 @@ class BartDecoder(BartPretrainedModel): ...@@ -1085,7 +1085,7 @@ class BartDecoder(BartPretrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -585,7 +585,7 @@ class BertEncoder(nn.Module): ...@@ -585,7 +585,7 @@ class BertEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -395,7 +395,7 @@ class BertEncoder(nn.Module): ...@@ -395,7 +395,7 @@ class BertEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -1606,7 +1606,7 @@ class BigBirdEncoder(nn.Module): ...@@ -1606,7 +1606,7 @@ class BigBirdEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -2265,7 +2265,7 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel): ...@@ -2265,7 +2265,7 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -557,7 +557,7 @@ class BioGptModel(BioGptPreTrainedModel): ...@@ -557,7 +557,7 @@ class BioGptModel(BioGptPreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -1016,7 +1016,7 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel): ...@@ -1016,7 +1016,7 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -1012,7 +1012,7 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel): ...@@ -1012,7 +1012,7 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -757,7 +757,7 @@ class BloomModel(BloomPreTrainedModel): ...@@ -757,7 +757,7 @@ class BloomModel(BloomPreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -769,7 +769,7 @@ class BridgeTowerTextEncoder(nn.Module): ...@@ -769,7 +769,7 @@ class BridgeTowerTextEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -516,7 +516,7 @@ class CamembertEncoder(nn.Module): ...@@ -516,7 +516,7 @@ class CamembertEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -901,7 +901,7 @@ class ChineseCLIPTextEncoder(nn.Module): ...@@ -901,7 +901,7 @@ class ChineseCLIPTextEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -1588,7 +1588,7 @@ class ClapTextEncoder(nn.Module): ...@@ -1588,7 +1588,7 @@ class ClapTextEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -548,7 +548,7 @@ class CodeGenModel(CodeGenPreTrainedModel): ...@@ -548,7 +548,7 @@ class CodeGenModel(CodeGenPreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting " "`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting "
"`use_cache=False`..." "`use_cache=False`..."
) )
......
...@@ -502,7 +502,7 @@ class Data2VecTextEncoder(nn.Module): ...@@ -502,7 +502,7 @@ class Data2VecTextEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -609,7 +609,7 @@ class DecisionTransformerGPT2Model(DecisionTransformerGPT2PreTrainedModel): ...@@ -609,7 +609,7 @@ class DecisionTransformerGPT2Model(DecisionTransformerGPT2PreTrainedModel):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -563,7 +563,7 @@ class ElectraEncoder(nn.Module): ...@@ -563,7 +563,7 @@ class ElectraEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -498,7 +498,7 @@ class ErnieEncoder(nn.Module): ...@@ -498,7 +498,7 @@ class ErnieEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
) )
use_cache = False use_cache = False
......
...@@ -597,7 +597,7 @@ class EsmEncoder(nn.Module): ...@@ -597,7 +597,7 @@ class EsmEncoder(nn.Module):
if self.gradient_checkpointing and self.training: if self.gradient_checkpointing and self.training:
if use_cache: if use_cache:
logger.warning( logger.warning_once(
"`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting " "`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting "
"`use_cache=False`..." "`use_cache=False`..."
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment