Unverified Commit 77abd1e7 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Centralize logging (#6434)



* Logging

* Style

* hf_logging > utils.logging

* Address @thomwolf's comments

* Update test

* Update src/transformers/benchmark/benchmark_utils.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Revert bad change
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 461ae868
...@@ -16,15 +16,15 @@ ...@@ -16,15 +16,15 @@
import collections import collections
import logging
from typing import List, Optional, Union from typing import List, Optional, Union
from .file_utils import add_end_docstrings, add_start_docstrings from .file_utils import add_end_docstrings, add_start_docstrings
from .tokenization_bert import BertTokenizer, BertTokenizerFast from .tokenization_bert import BertTokenizer, BertTokenizerFast
from .tokenization_utils_base import BatchEncoding, TensorType from .tokenization_utils_base import BatchEncoding, TensorType
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
......
...@@ -15,15 +15,15 @@ ...@@ -15,15 +15,15 @@
"""Tokenization classes for Flaubert, based on XLM.""" """Tokenization classes for Flaubert, based on XLM."""
import logging
import unicodedata import unicodedata
import six import six
from .tokenization_xlm import XLMTokenizer from .tokenization_xlm import XLMTokenizer
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = { VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json", "vocab_file": "vocab.json",
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import json import json
import logging
import os import os
from functools import lru_cache from functools import lru_cache
...@@ -26,9 +25,10 @@ from tokenizers import ByteLevelBPETokenizer ...@@ -26,9 +25,10 @@ from tokenizers import ByteLevelBPETokenizer
from .tokenization_utils import AddedToken, PreTrainedTokenizer from .tokenization_utils import AddedToken, PreTrainedTokenizer
from .tokenization_utils_base import BatchEncoding from .tokenization_utils_base import BatchEncoding
from .tokenization_utils_fast import PreTrainedTokenizerFast from .tokenization_utils_fast import PreTrainedTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = { VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json", "vocab_file": "vocab.json",
......
...@@ -13,12 +13,11 @@ ...@@ -13,12 +13,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
# vocab and merges same as roberta # vocab and merges same as roberta
......
...@@ -13,16 +13,16 @@ ...@@ -13,16 +13,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
from typing import List, Optional from typing import List, Optional
from .file_utils import add_start_docstrings_to_callable from .file_utils import add_start_docstrings_to_callable
from .tokenization_utils import BatchEncoding from .tokenization_utils import BatchEncoding
from .tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING from .tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING
from .tokenization_xlm_roberta import XLMRobertaTokenizer from .tokenization_xlm_roberta import XLMRobertaTokenizer
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
_all_mbart_models = ["facebook/mbart-large-en-ro", "facebook/mbart-large-cc25"] _all_mbart_models = ["facebook/mbart-large-en-ro", "facebook/mbart-large-cc25"]
SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model" SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model"
......
...@@ -13,13 +13,11 @@ ...@@ -13,13 +13,11 @@
# limitations under the License. # limitations under the License.
"""Tokenization classes for MobileBERT.""" """Tokenization classes for MobileBERT."""
import logging
from .tokenization_bert import BertTokenizer, BertTokenizerFast from .tokenization_bert import BertTokenizer, BertTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import json import json
import logging
import os import os
import re import re
...@@ -25,9 +24,10 @@ from tokenizers import CharBPETokenizer ...@@ -25,9 +24,10 @@ from tokenizers import CharBPETokenizer
from .tokenization_bert import BasicTokenizer from .tokenization_bert import BasicTokenizer
from .tokenization_utils import PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizer
from .tokenization_utils_fast import PreTrainedTokenizerFast from .tokenization_utils_fast import PreTrainedTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = { VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json", "vocab_file": "vocab.json",
......
...@@ -15,14 +15,14 @@ ...@@ -15,14 +15,14 @@
""" Tokenization class for model Reformer.""" """ Tokenization class for model Reformer."""
import logging
import os import os
from shutil import copyfile from shutil import copyfile
from .tokenization_utils import PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
SPIECE_UNDERLINE = "▁" SPIECE_UNDERLINE = "▁"
......
...@@ -14,13 +14,11 @@ ...@@ -14,13 +14,11 @@
# limitations under the License. # limitations under the License.
"""Tokenization classes for RetriBERT.""" """Tokenization classes for RetriBERT."""
import logging
from .tokenization_bert import BertTokenizer, BertTokenizerFast from .tokenization_bert import BertTokenizer, BertTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
......
...@@ -15,16 +15,16 @@ ...@@ -15,16 +15,16 @@
"""Tokenization classes for RoBERTa.""" """Tokenization classes for RoBERTa."""
import logging
from typing import List, Optional from typing import List, Optional
from tokenizers.processors import RobertaProcessing from tokenizers.processors import RobertaProcessing
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
from .tokenization_utils import AddedToken from .tokenization_utils import AddedToken
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = { VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json", "vocab_file": "vocab.json",
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
""" Tokenization class for model T5.""" """ Tokenization class for model T5."""
import logging
import os import os
import re import re
import warnings import warnings
...@@ -23,9 +22,10 @@ from shutil import copyfile ...@@ -23,9 +22,10 @@ from shutil import copyfile
from typing import List, Optional from typing import List, Optional
from .tokenization_utils import BatchEncoding, PreTrainedTokenizer from .tokenization_utils import BatchEncoding, PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
SPIECE_UNDERLINE = "▁" SPIECE_UNDERLINE = "▁"
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
import glob import glob
import logging
import os import os
import pickle import pickle
import re import re
...@@ -38,13 +37,14 @@ from tokenizers.processors import BertProcessing ...@@ -38,13 +37,14 @@ from tokenizers.processors import BertProcessing
from .file_utils import cached_path, is_torch_available from .file_utils import cached_path, is_torch_available
from .tokenization_utils import PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizer
from .tokenization_utils_fast import PreTrainedTokenizerFast from .tokenization_utils_fast import PreTrainedTokenizerFast
from .utils import logging
if is_torch_available(): if is_torch_available():
import torch import torch
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"pretrained_vocab_file": "vocab.bin", "vocab_file": "vocab.txt"} VOCAB_FILES_NAMES = {"pretrained_vocab_file": "vocab.bin", "vocab_file": "vocab.txt"}
VOCAB_FILES_NAMES_FAST = {"pretrained_vocab_file": "vocab.json", "vocab_file": "vocab.json"} VOCAB_FILES_NAMES_FAST = {"pretrained_vocab_file": "vocab.json", "vocab_file": "vocab.json"}
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
""" """
import itertools import itertools
import logging
import re import re
import unicodedata import unicodedata
from typing import Any, Dict, List, Optional, Tuple, Union, overload from typing import Any, Dict, List, Optional, Tuple, Union, overload
...@@ -40,9 +39,10 @@ from .tokenization_utils_base import ( ...@@ -40,9 +39,10 @@ from .tokenization_utils_base import (
TextInputPair, TextInputPair,
TruncationStrategy, TruncationStrategy,
) )
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
def _is_whitespace(char): def _is_whitespace(char):
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
import copy import copy
import json import json
import logging
import os import os
import warnings import warnings
from collections import OrderedDict, UserDict from collections import OrderedDict, UserDict
...@@ -41,6 +40,7 @@ from .file_utils import ( ...@@ -41,6 +40,7 @@ from .file_utils import (
is_torch_available, is_torch_available,
torch_required, torch_required,
) )
from .utils import logging
if is_tf_available(): if is_tf_available():
...@@ -49,7 +49,7 @@ if is_torch_available(): ...@@ -49,7 +49,7 @@ if is_torch_available():
import torch import torch
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VERY_LARGE_INTEGER = int(1e30) # This is used to set the max input length for a model with infinite size input VERY_LARGE_INTEGER = int(1e30) # This is used to set the max input length for a model with infinite size input
LARGE_INTEGER = int(1e20) # This is used when we need something big but slightly smaller than VERY_LARGE_INTEGER LARGE_INTEGER = int(1e20) # This is used when we need something big but slightly smaller than VERY_LARGE_INTEGER
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
For slow (python) tokenizers see tokenization_utils.py For slow (python) tokenizers see tokenization_utils.py
""" """
import logging
import os import os
from collections import defaultdict from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
...@@ -38,9 +37,10 @@ from .tokenization_utils_base import ( ...@@ -38,9 +37,10 @@ from .tokenization_utils_base import (
TextInputPair, TextInputPair,
TruncationStrategy, TruncationStrategy,
) )
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
@add_end_docstrings( @add_end_docstrings(
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import json import json
import logging
import os import os
import re import re
import sys import sys
...@@ -26,9 +25,10 @@ from typing import List, Optional ...@@ -26,9 +25,10 @@ from typing import List, Optional
import sacremoses as sm import sacremoses as sm
from .tokenization_utils import PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = { VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json", "vocab_file": "vocab.json",
......
...@@ -15,16 +15,16 @@ ...@@ -15,16 +15,16 @@
""" Tokenization classes for XLM-RoBERTa model.""" """ Tokenization classes for XLM-RoBERTa model."""
import logging
import os import os
from shutil import copyfile from shutil import copyfile
from typing import List, Optional from typing import List, Optional
from .tokenization_utils import PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizer
from .tokenization_xlnet import SPIECE_UNDERLINE from .tokenization_xlnet import SPIECE_UNDERLINE
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"} VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"}
......
...@@ -15,16 +15,16 @@ ...@@ -15,16 +15,16 @@
""" Tokenization classes for XLNet model.""" """ Tokenization classes for XLNet model."""
import logging
import os import os
import unicodedata import unicodedata
from shutil import copyfile from shutil import copyfile
from typing import List, Optional from typing import List, Optional
from .tokenization_utils import PreTrainedTokenizer from .tokenization_utils import PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}
......
import inspect import inspect
import logging
import math import math
import os import os
import re import re
...@@ -44,6 +43,7 @@ from .trainer_utils import ( ...@@ -44,6 +43,7 @@ from .trainer_utils import (
set_seed, set_seed,
) )
from .training_args import TrainingArguments from .training_args import TrainingArguments
from .utils import logging
_use_native_amp = False _use_native_amp = False
...@@ -86,7 +86,7 @@ if is_optuna_available(): ...@@ -86,7 +86,7 @@ if is_optuna_available():
if is_ray_available(): if is_ray_available():
from ray import tune from ray import tune
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
@contextmanager @contextmanager
......
"""Tensorflow trainer class.""" """Tensorflow trainer class."""
import datetime import datetime
import logging
import math import math
import os import os
import warnings import warnings
...@@ -16,6 +15,7 @@ from .modeling_tf_utils import TFPreTrainedModel ...@@ -16,6 +15,7 @@ from .modeling_tf_utils import TFPreTrainedModel
from .optimization_tf import GradientAccumulator, create_optimizer from .optimization_tf import GradientAccumulator, create_optimizer
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed
from .training_args_tf import TFTrainingArguments from .training_args_tf import TFTrainingArguments
from .utils import logging
if is_wandb_available(): if is_wandb_available():
...@@ -24,7 +24,7 @@ if is_wandb_available(): ...@@ -24,7 +24,7 @@ if is_wandb_available():
if is_comet_available(): if is_comet_available():
import comet_ml import comet_ml
logger = logging.getLogger(__name__) logger = logging.get_logger(__name__)
class TFTrainer: class TFTrainer:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment