Unverified Commit 77abd1e7 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Centralize logging (#6434)



* Logging

* Style

* hf_logging > utils.logging

* Address @thomwolf's comments

* Update test

* Update src/transformers/benchmark/benchmark_utils.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Revert bad change
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 461ae868
......@@ -16,15 +16,15 @@
import collections
import logging
from typing import List, Optional, Union
from .file_utils import add_end_docstrings, add_start_docstrings
from .tokenization_bert import BertTokenizer, BertTokenizerFast
from .tokenization_utils_base import BatchEncoding, TensorType
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
......
......@@ -15,15 +15,15 @@
"""Tokenization classes for Flaubert, based on XLM."""
import logging
import unicodedata
import six
from .tokenization_xlm import XLMTokenizer
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json",
......
......@@ -16,7 +16,6 @@
import json
import logging
import os
from functools import lru_cache
......@@ -26,9 +25,10 @@ from tokenizers import ByteLevelBPETokenizer
from .tokenization_utils import AddedToken, PreTrainedTokenizer
from .tokenization_utils_base import BatchEncoding
from .tokenization_utils_fast import PreTrainedTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json",
......
......@@ -13,12 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
# vocab and merges same as roberta
......
......@@ -13,16 +13,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import List, Optional
from .file_utils import add_start_docstrings_to_callable
from .tokenization_utils import BatchEncoding
from .tokenization_utils_base import PREPARE_SEQ2SEQ_BATCH_DOCSTRING
from .tokenization_xlm_roberta import XLMRobertaTokenizer
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
_all_mbart_models = ["facebook/mbart-large-en-ro", "facebook/mbart-large-cc25"]
SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model"
......
......@@ -13,13 +13,11 @@
# limitations under the License.
"""Tokenization classes for MobileBERT."""
import logging
from .tokenization_bert import BertTokenizer, BertTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
......
......@@ -16,7 +16,6 @@
import json
import logging
import os
import re
......@@ -25,9 +24,10 @@ from tokenizers import CharBPETokenizer
from .tokenization_bert import BasicTokenizer
from .tokenization_utils import PreTrainedTokenizer
from .tokenization_utils_fast import PreTrainedTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json",
......
......@@ -15,14 +15,14 @@
""" Tokenization class for model Reformer."""
import logging
import os
from shutil import copyfile
from .tokenization_utils import PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
SPIECE_UNDERLINE = "▁"
......
......@@ -14,13 +14,11 @@
# limitations under the License.
"""Tokenization classes for RetriBERT."""
import logging
from .tokenization_bert import BertTokenizer, BertTokenizerFast
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
......
......@@ -15,16 +15,16 @@
"""Tokenization classes for RoBERTa."""
import logging
from typing import List, Optional
from tokenizers.processors import RobertaProcessing
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
from .tokenization_utils import AddedToken
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json",
......
......@@ -15,7 +15,6 @@
""" Tokenization class for model T5."""
import logging
import os
import re
import warnings
......@@ -23,9 +22,10 @@ from shutil import copyfile
from typing import List, Optional
from .tokenization_utils import BatchEncoding, PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
SPIECE_UNDERLINE = "▁"
......
......@@ -19,7 +19,6 @@
import glob
import logging
import os
import pickle
import re
......@@ -38,13 +37,14 @@ from tokenizers.processors import BertProcessing
from .file_utils import cached_path, is_torch_available
from .tokenization_utils import PreTrainedTokenizer
from .tokenization_utils_fast import PreTrainedTokenizerFast
from .utils import logging
if is_torch_available():
import torch
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"pretrained_vocab_file": "vocab.bin", "vocab_file": "vocab.txt"}
VOCAB_FILES_NAMES_FAST = {"pretrained_vocab_file": "vocab.json", "vocab_file": "vocab.json"}
......
......@@ -17,7 +17,6 @@
"""
import itertools
import logging
import re
import unicodedata
from typing import Any, Dict, List, Optional, Tuple, Union, overload
......@@ -40,9 +39,10 @@ from .tokenization_utils_base import (
TextInputPair,
TruncationStrategy,
)
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
def _is_whitespace(char):
......
......@@ -20,7 +20,6 @@
import copy
import json
import logging
import os
import warnings
from collections import OrderedDict, UserDict
......@@ -41,6 +40,7 @@ from .file_utils import (
is_torch_available,
torch_required,
)
from .utils import logging
if is_tf_available():
......@@ -49,7 +49,7 @@ if is_torch_available():
import torch
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VERY_LARGE_INTEGER = int(1e30) # This is used to set the max input length for a model with infinite size input
LARGE_INTEGER = int(1e20) # This is used when we need something big but slightly smaller than VERY_LARGE_INTEGER
......
......@@ -16,7 +16,6 @@
For slow (python) tokenizers see tokenization_utils.py
"""
import logging
import os
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple, Union
......@@ -38,9 +37,10 @@ from .tokenization_utils_base import (
TextInputPair,
TruncationStrategy,
)
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
@add_end_docstrings(
......
......@@ -16,7 +16,6 @@
import json
import logging
import os
import re
import sys
......@@ -26,9 +25,10 @@ from typing import List, Optional
import sacremoses as sm
from .tokenization_utils import PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {
"vocab_file": "vocab.json",
......
......@@ -15,16 +15,16 @@
""" Tokenization classes for XLM-RoBERTa model."""
import logging
import os
from shutil import copyfile
from typing import List, Optional
from .tokenization_utils import PreTrainedTokenizer
from .tokenization_xlnet import SPIECE_UNDERLINE
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"}
......
......@@ -15,16 +15,16 @@
""" Tokenization classes for XLNet model."""
import logging
import os
import unicodedata
from shutil import copyfile
from typing import List, Optional
from .tokenization_utils import PreTrainedTokenizer
from .utils import logging
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}
......
import inspect
import logging
import math
import os
import re
......@@ -44,6 +43,7 @@ from .trainer_utils import (
set_seed,
)
from .training_args import TrainingArguments
from .utils import logging
_use_native_amp = False
......@@ -86,7 +86,7 @@ if is_optuna_available():
if is_ray_available():
from ray import tune
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
@contextmanager
......
"""Tensorflow trainer class."""
import datetime
import logging
import math
import os
import warnings
......@@ -16,6 +15,7 @@ from .modeling_tf_utils import TFPreTrainedModel
from .optimization_tf import GradientAccumulator, create_optimizer
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed
from .training_args_tf import TFTrainingArguments
from .utils import logging
if is_wandb_available():
......@@ -24,7 +24,7 @@ if is_wandb_available():
if is_comet_available():
import comet_ml
logger = logging.getLogger(__name__)
logger = logging.get_logger(__name__)
class TFTrainer:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment