Centralize logging (#6434)

* Logging * Style * hf_logging > utils.logging * Address @thomwolf's comments * Update test * Update src/transformers/benchmark/benchmark_utils.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Revert bad change Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Centralize logging (#6434)
* Logging * Style * hf_logging > utils.logging * Address @thomwolf's comments * Update test * Update src/transformers/benchmark/benchmark_utils.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Revert bad change Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
77abd1e7 · Lysandre Debut · GitHub · 461ae868 · 77abd1e7 · 77abd1e7
Unverified Commit 77abd1e7 authored Aug 26, 2020 by Lysandre Debut Committed by GitHub Aug 26, 2020
20 changed files
--- a/src/transformers/tokenization_dpr.py
+++ b/src/transformers/tokenization_dpr.py
@@ -16,15 +16,15 @@


 import collections
-import logging
 from typing import List, Optional, Union

 from .file_utils import add_end_docstrings, add_start_docstrings
 from .tokenization_bert import BertTokenizer, BertTokenizerFast
 from .tokenization_utils_base import BatchEncoding, TensorType
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}


--- a/src/transformers/tokenization_flaubert.py
+++ b/src/transformers/tokenization_flaubert.py
@@ -15,15 +15,15 @@
 """Tokenization classes for Flaubert, based on XLM."""


-import logging
 import unicodedata

 import six

 from .tokenization_xlm import XLMTokenizer
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {
    "vocab_file": "vocab.json",

--- a/src/transformers/tokenization_gpt2.py
+++ b/src/transformers/tokenization_gpt2.py
@@ -16,7 +16,6 @@


 import json
-import logging
 import os
 from functools import lru_cache

@@ -26,9 +25,10 @@ from tokenizers import ByteLevelBPETokenizer
 from .tokenization_utils import AddedToken, PreTrainedTokenizer
 from .tokenization_utils_base import BatchEncoding
 from .tokenization_utils_fast import PreTrainedTokenizerFast
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {
    "vocab_file": "vocab.json",

--- a/src/transformers/tokenization_longformer.py
+++ b/src/transformers/tokenization_longformer.py
@@ -13,12 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import logging
-
 from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)


 # vocab and merges same as roberta

--- a/src/transformers/tokenization_mbart.py
+++ b/src/transformers/tokenization_mbart.py
@@ -13,16 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import logging
 from typing import List, Optional

 from .file_utils import add_start_docstrings_to_callable
 from .tokenization_utils import BatchEncoding
 from .tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING
 from .tokenization_xlm_roberta import XLMRobertaTokenizer
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 _all_mbart_models = ["facebook/mbart-large-en-ro", "facebook/mbart-large-cc25"]
 SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model"

--- a/src/transformers/tokenization_mobilebert.py
+++ b/src/transformers/tokenization_mobilebert.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 """Tokenization classes for MobileBERT."""

-
-import logging
-
 from .tokenization_bert import BertTokenizer, BertTokenizerFast
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}


--- a/src/transformers/tokenization_openai.py
+++ b/src/transformers/tokenization_openai.py
@@ -16,7 +16,6 @@


 import json
-import logging
 import os
 import re

@@ -25,9 +24,10 @@ from tokenizers import CharBPETokenizer
 from .tokenization_bert import BasicTokenizer
 from .tokenization_utils import PreTrainedTokenizer
 from .tokenization_utils_fast import PreTrainedTokenizerFast
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {
    "vocab_file": "vocab.json",

--- a/src/transformers/tokenization_reformer.py
+++ b/src/transformers/tokenization_reformer.py
@@ -15,14 +15,14 @@
 """ Tokenization class for model Reformer."""


-import logging
 import os
 from shutil import copyfile

 from .tokenization_utils import PreTrainedTokenizer
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 SPIECE_UNDERLINE = "▁"


--- a/src/transformers/tokenization_retribert.py
+++ b/src/transformers/tokenization_retribert.py
@@ -14,13 +14,11 @@
 # limitations under the License.
 """Tokenization classes for RetriBERT."""

-
-import logging
-
 from .tokenization_bert import BertTokenizer, BertTokenizerFast
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}


--- a/src/transformers/tokenization_roberta.py
+++ b/src/transformers/tokenization_roberta.py
@@ -15,16 +15,16 @@
 """Tokenization classes for RoBERTa."""


-import logging
 from typing import List, Optional

 from tokenizers.processors import RobertaProcessing

 from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
 from .tokenization_utils import AddedToken
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {
    "vocab_file": "vocab.json",

--- a/src/transformers/tokenization_t5.py
+++ b/src/transformers/tokenization_t5.py
@@ -15,7 +15,6 @@
 """ Tokenization class for model T5."""


-import logging
 import os
 import re
 import warnings
@@ -23,9 +22,10 @@ from shutil import copyfile
 from typing import List, Optional

 from .tokenization_utils import BatchEncoding, PreTrainedTokenizer
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 SPIECE_UNDERLINE = "▁"


--- a/src/transformers/tokenization_transfo_xl.py
+++ b/src/transformers/tokenization_transfo_xl.py
@@ -19,7 +19,6 @@


 import glob
-import logging
 import os
 import pickle
 import re
@@ -38,13 +37,14 @@ from tokenizers.processors import BertProcessing
 from .file_utils import cached_path, is_torch_available
 from .tokenization_utils import PreTrainedTokenizer
 from .tokenization_utils_fast import PreTrainedTokenizerFast
+from .utils import logging


 if is_torch_available():
    import torch


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {"pretrained_vocab_file": "vocab.bin", "vocab_file": "vocab.txt"}
 VOCAB_FILES_NAMES_FAST = {"pretrained_vocab_file": "vocab.json", "vocab_file": "vocab.json"}

--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -17,7 +17,6 @@
 """

 import itertools
-import logging
 import re
 import unicodedata
 from typing import Any, Dict, List, Optional, Tuple, Union, overload
@@ -40,9 +39,10 @@ from .tokenization_utils_base import (
    TextInputPair,
    TruncationStrategy,
 )
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)


 def _is_whitespace(char):

--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -20,7 +20,6 @@

 import copy
 import json
-import logging
 import os
 import warnings
 from collections import OrderedDict, UserDict
@@ -41,6 +40,7 @@ from .file_utils import (
    is_torch_available,
    torch_required,
 )
+from .utils import logging


 if is_tf_available():
@@ -49,7 +49,7 @@ if is_torch_available():
    import torch


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VERY_LARGE_INTEGER = int(1e30)  # This is used to set the max input length for a model with infinite size input
 LARGE_INTEGER = int(1e20)  # This is used when we need something big but slightly smaller than VERY_LARGE_INTEGER

--- a/src/transformers/tokenization_utils_fast.py
+++ b/src/transformers/tokenization_utils_fast.py
@@ -16,7 +16,6 @@
    For slow (python) tokenizers see tokenization_utils.py
 """

-import logging
 import os
 from collections import defaultdict
 from typing import Any, Dict, List, Optional, Tuple, Union
@@ -38,9 +37,10 @@ from .tokenization_utils_base import (
    TextInputPair,
    TruncationStrategy,
 )
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)


 @add_end_docstrings(

--- a/src/transformers/tokenization_xlm.py
+++ b/src/transformers/tokenization_xlm.py
@@ -16,7 +16,6 @@


 import json
-import logging
 import os
 import re
 import sys
@@ -26,9 +25,10 @@ from typing import List, Optional
 import sacremoses as sm

 from .tokenization_utils import PreTrainedTokenizer
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {
    "vocab_file": "vocab.json",

--- a/src/transformers/tokenization_xlm_roberta.py
+++ b/src/transformers/tokenization_xlm_roberta.py
@@ -15,16 +15,16 @@
 """ Tokenization classes for XLM-RoBERTa model."""


-import logging
 import os
 from shutil import copyfile
 from typing import List, Optional

 from .tokenization_utils import PreTrainedTokenizer
 from .tokenization_xlnet import SPIECE_UNDERLINE
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"}


--- a/src/transformers/tokenization_xlnet.py
+++ b/src/transformers/tokenization_xlnet.py
@@ -15,16 +15,16 @@
 """ Tokenization classes for XLNet model."""


-import logging
 import os
 import unicodedata
 from shutil import copyfile
 from typing import List, Optional

 from .tokenization_utils import PreTrainedTokenizer
+from .utils import logging


-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)

 VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}


--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
 import inspect
-import logging
 import math
 import os
 import re
@@ -44,6 +43,7 @@ from .trainer_utils import (
    set_seed,
 )
 from .training_args import TrainingArguments
+from .utils import logging


 _use_native_amp = False
@@ -86,7 +86,7 @@ if is_optuna_available():
 if is_ray_available():
    from ray import tune

-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)


 @contextmanager

--- a/src/transformers/trainer_tf.py
+++ b/src/transformers/trainer_tf.py
 """Tensorflow trainer class."""

 import datetime
-import logging
 import math
 import os
 import warnings
@@ -16,6 +15,7 @@ from .modeling_tf_utils import TFPreTrainedModel
 from .optimization_tf import GradientAccumulator, create_optimizer
 from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed
 from .training_args_tf import TFTrainingArguments
+from .utils import logging


 if is_wandb_available():
@@ -24,7 +24,7 @@ if is_wandb_available():
 if is_comet_available():
    import comet_ml

-logger = logging.getLogger(__name__)
+logger = logging.get_logger(__name__)


 class TFTrainer: