Black 20 release

a75c64d8 · Lysandre · e78c1103 · a75c64d8 · a75c64d8 · a75c64d8
Commit a75c64d8 authored Aug 26, 2020 by Lysandre
20 changed files
--- a/examples/token-classification/utils_ner.py
+++ b/examples/token-classification/utils_ner.py
@@ -90,7 +90,7 @@ class TokenClassificationTask:
        sequence_a_segment_id=0,
        mask_padding_with_zero=True,
    ) -> List[InputFeatures]:
-        """ Loads a data file into a list of `InputFeatures`
+        """Loads a data file into a list of `InputFeatures`
        `cls_token_at_end` define the location of the CLS token:
            - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
            - True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
@@ -230,7 +230,8 @@ if is_torch_available():
        ):
            # Load data features from cache or dataset file
            cached_features_file = os.path.join(
-                data_dir, "cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)),
+                data_dir,
+                "cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)),
            )
            # Make sure only the first process in distributed training processes the dataset,

--- a/src/transformers/activations.py
+++ b/src/transformers/activations.py
@@ -14,7 +14,7 @@ def swish(x):
 def _gelu_python(x):
-    """ Original Implementation of the gelu activation function in Google Bert repo when initially created.
+    """Original Implementation of the gelu activation function in Google Bert repo when initially created.
    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
    0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
    This is now written in C in torch.nn.functional
@@ -24,7 +24,7 @@ def _gelu_python(x):
 def gelu_new(x):
-    """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
+    """Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
    Also see https://arxiv.org/abs/1606.08415
    """
    return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))

--- a/src/transformers/benchmark/benchmark.py
+++ b/src/transformers/benchmark/benchmark.py
@@ -199,11 +199,17 @@ class PyTorchBenchmark(Benchmark):
                # run additional 10 times to stabilize compilation for tpu and torchscript
                logger.info("Do inference on TPU or torchscript. Running model 5 times to stabilize compilation")
                timeit.repeat(
-                    func, repeat=1, number=5,
+                    func,
+                    repeat=1,
+                    number=5,
                )
            # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
-            runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,)
+            runtimes = timeit.repeat(
+                func,
+                repeat=self.args.repeat,
+                number=10,
+            )
            if self.args.is_tpu and self.args.torch_xla_tpu_print_metrics:
                import torch_xla.debug.metrics as met

--- a/src/transformers/benchmark/benchmark_args_tf.py
+++ b/src/transformers/benchmark/benchmark_args_tf.py
@@ -32,10 +32,12 @@ logger = logging.get_logger(__name__)
 @dataclass
 class TensorFlowBenchmarkArguments(BenchmarkArguments):
    tpu_name: str = field(
-        default=None, metadata={"help": "Name of TPU"},
+        default=None,
+        metadata={"help": "Name of TPU"},
    )
    device_idx: int = field(
-        default=0, metadata={"help": "CPU / GPU device index. Defaults to 0."},
+        default=0,
+        metadata={"help": "CPU / GPU device index. Defaults to 0."},
    )
    eager_mode: bool = field(default=False, metadata={"help": "Benchmark models in eager model."})
    use_xla: bool = field(

--- a/src/transformers/benchmark/benchmark_tf.py
+++ b/src/transformers/benchmark/benchmark_tf.py
@@ -219,7 +219,11 @@ class TensorFlowBenchmark(Benchmark):
                    timeit.repeat(func, repeat=1, number=5)
                # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
-                runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,)
+                runtimes = timeit.repeat(
+                    func,
+                    repeat=self.args.repeat,
+                    number=10,
+                )
                return min(runtimes) / 10.0
            except ResourceExhaustedError as e:

--- a/src/transformers/benchmark/benchmark_utils.py
+++ b/src/transformers/benchmark/benchmark_utils.py
@@ -106,7 +106,7 @@ def is_memory_tracing_enabled():
 class Frame(NamedTuple):
-    """ `Frame` is a NamedTuple used to gather the current frame state.
+    """`Frame` is a NamedTuple used to gather the current frame state.
    `Frame` has the following fields:
    - 'filename' (string): Name of the file currently executed
    - 'module' (string): Name of the module currently executed
@@ -123,7 +123,7 @@ class Frame(NamedTuple):
 class UsedMemoryState(NamedTuple):
-    """ `UsedMemoryState` are named tuples with the following fields:
+    """`UsedMemoryState` are named tuples with the following fields:
    - 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
    - 'cpu_memory': CPU RSS memory state *before* executing the line
    - 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
@@ -135,7 +135,7 @@ class UsedMemoryState(NamedTuple):
 class Memory(NamedTuple):
-    """ `Memory` NamedTuple have a single field `bytes` and
+    """`Memory` NamedTuple have a single field `bytes` and
    you can get a human readable str of the number of mega bytes by calling `__repr__`
        - `byte` (integer): number of bytes,
    """
@@ -147,7 +147,7 @@ class Memory(NamedTuple):
 class MemoryState(NamedTuple):
-    """ `MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
+    """`MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
    - `frame` (`Frame`): the current frame (see above)
    - `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
    - `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
@@ -161,7 +161,7 @@ class MemoryState(NamedTuple):
 class MemorySummary(NamedTuple):
-    """ `MemorySummary` namedtuple otherwise with the fields:
+    """`MemorySummary` namedtuple otherwise with the fields:
    - `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
        by substracting the memory after executing each line from the memory before executing said line.
    - `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
@@ -309,7 +309,7 @@ def start_memory_tracing(
    events_to_trace: str = "line",
    gpus_to_trace: Optional[List[int]] = None,
 ) -> MemoryTrace:
-    """ Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
+    """Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
    See `./benchmark.py` for usage examples.
    Current memory consumption is returned using psutil and in particular is the RSS memory
        "Resident Set Size” (the non-swapped physical memory the process is using).
@@ -371,7 +371,7 @@ def start_memory_tracing(
    memory_trace = []
    def traceit(frame, event, args):
-        """ Tracing method executed before running each line in a module or sub-module
+        """Tracing method executed before running each line in a module or sub-module
        Record memory allocated in a list with debugging information
        """
        global _is_memory_tracing_enabled
@@ -456,7 +456,7 @@ def start_memory_tracing(
 def stop_memory_tracing(
    memory_trace: Optional[MemoryTrace] = None, ignore_released_memory: bool = True
 ) -> Optional[MemorySummary]:
-    """ Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
+    """Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
    Args:
        - `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
@@ -499,15 +499,19 @@ def stop_memory_tracing(
        cumulative_memory_dict = defaultdict(lambda: [0, 0, 0])
-        for ((frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem),) in zip(
+        for (
-            memory_trace[:-1], memory_trace[1:]
+            (frame, cpu_mem, gpu_mem),
-        ):
+            (next_frame, next_cpu_mem, next_gpu_mem),
+        ) in zip(memory_trace[:-1], memory_trace[1:]):
            cpu_mem_inc = next_cpu_mem - cpu_mem
            gpu_mem_inc = next_gpu_mem - gpu_mem
            cpu_gpu_mem_inc = cpu_mem_inc + gpu_mem_inc
            memory_diff_trace.append(
                MemoryState(
-                    frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc),
+                    frame=frame,
+                    cpu=Memory(cpu_mem_inc),
+                    gpu=Memory(gpu_mem_inc),
+                    cpu_gpu=Memory(cpu_gpu_mem_inc),
                )
            )
@@ -529,7 +533,10 @@ def stop_memory_tracing(
        )  # order by the total CPU + GPU memory increase
        cumulative_memory = list(
            MemoryState(
-                frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc),
+                frame=frame,
+                cpu=Memory(cpu_mem_inc),
+                gpu=Memory(gpu_mem_inc),
+                cpu_gpu=Memory(cpu_gpu_mem_inc),
            )
            for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory
        )
@@ -544,15 +551,17 @@ def stop_memory_tracing(
        total_memory = Memory(total_memory)
        return MemorySummary(
-            sequential=memory_diff_trace, cumulative=cumulative_memory, current=memory_curr_trace, total=total_memory,
+            sequential=memory_diff_trace,
+            cumulative=cumulative_memory,
+            current=memory_curr_trace,
+            total=total_memory,
        )
    return None
 def bytes_to_mega_bytes(memory_amount: int) -> int:
-    """ Utility to convert a number of bytes (int) into a number of mega bytes (int)
+    """Utility to convert a number of bytes (int) into a number of mega bytes (int)"""
-    """
    return memory_amount >> 20

--- a/src/transformers/configuration_albert.py
+++ b/src/transformers/configuration_albert.py
--- a/src/transformers/configuration_auto.py
+++ b/src/transformers/configuration_auto.py
@@ -73,30 +73,99 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
 CONFIG_MAPPING = OrderedDict(
    [
-        ("retribert", RetriBertConfig,),
+        (
-        ("t5", T5Config,),
+            "retribert",
-        ("mobilebert", MobileBertConfig,),
+            RetriBertConfig,
-        ("distilbert", DistilBertConfig,),
+        ),
-        ("albert", AlbertConfig,),
+        (
-        ("camembert", CamembertConfig,),
+            "t5",
-        ("xlm-roberta", XLMRobertaConfig,),
+            T5Config,
+        ),
+        (
+            "mobilebert",
+            MobileBertConfig,
+        ),
+        (
+            "distilbert",
+            DistilBertConfig,
+        ),
+        (
+            "albert",
+            AlbertConfig,
+        ),
+        (
+            "camembert",
+            CamembertConfig,
+        ),
+        (
+            "xlm-roberta",
+            XLMRobertaConfig,
+        ),
        ("pegasus", PegasusConfig),
-        ("marian", MarianConfig,),
+        (
-        ("mbart", MBartConfig,),
+            "marian",
-        ("bart", BartConfig,),
+            MarianConfig,
-        ("reformer", ReformerConfig,),
+        ),
-        ("longformer", LongformerConfig,),
+        (
-        ("roberta", RobertaConfig,),
+            "mbart",
-        ("flaubert", FlaubertConfig,),
+            MBartConfig,
-        ("bert", BertConfig,),
+        ),
-        ("openai-gpt", OpenAIGPTConfig,),
+        (
-        ("gpt2", GPT2Config,),
+            "bart",
-        ("transfo-xl", TransfoXLConfig,),
+            BartConfig,
-        ("xlnet", XLNetConfig,),
+        ),
-        ("xlm", XLMConfig,),
+        (
-        ("ctrl", CTRLConfig,),
+            "reformer",
-        ("electra", ElectraConfig,),
+            ReformerConfig,
-        ("encoder-decoder", EncoderDecoderConfig,),
+        ),
+        (
+            "longformer",
+            LongformerConfig,
+        ),
+        (
+            "roberta",
+            RobertaConfig,
+        ),
+        (
+            "flaubert",
+            FlaubertConfig,
+        ),
+        (
+            "bert",
+            BertConfig,
+        ),
+        (
+            "openai-gpt",
+            OpenAIGPTConfig,
+        ),
+        (
+            "gpt2",
+            GPT2Config,
+        ),
+        (
+            "transfo-xl",
+            TransfoXLConfig,
+        ),
+        (
+            "xlnet",
+            XLNetConfig,
+        ),
+        (
+            "xlm",
+            XLMConfig,
+        ),
+        (
+            "ctrl",
+            CTRLConfig,
+        ),
+        (
+            "electra",
+            ElectraConfig,
+        ),
+        (
+            "encoder-decoder",
+            EncoderDecoderConfig,
+        ),
    ]
 )

--- a/src/transformers/configuration_bart.py
+++ b/src/transformers/configuration_bart.py
--- a/src/transformers/configuration_bert.py
+++ b/src/transformers/configuration_bert.py
--- a/src/transformers/configuration_ctrl.py
+++ b/src/transformers/configuration_ctrl.py
--- a/src/transformers/configuration_distilbert.py
+++ b/src/transformers/configuration_distilbert.py
--- a/src/transformers/configuration_dpr.py
+++ b/src/transformers/configuration_dpr.py
--- a/src/transformers/configuration_electra.py
+++ b/src/transformers/configuration_electra.py
--- a/src/transformers/configuration_encoder_decoder.py
+++ b/src/transformers/configuration_encoder_decoder.py
--- a/src/transformers/configuration_flaubert.py
+++ b/src/transformers/configuration_flaubert.py
@@ -143,8 +143,7 @@ class FlaubertConfig(XLMConfig):
    model_type = "flaubert"
    def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs):
-        """Constructs FlaubertConfig.
+        """Constructs FlaubertConfig."""
-        """
        super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, **kwargs)
        self.layerdrop = layerdrop
        self.pre_norm = pre_norm
--- a/src/transformers/configuration_gpt2.py
+++ b/src/transformers/configuration_gpt2.py
--- a/src/transformers/configuration_longformer.py
+++ b/src/transformers/configuration_longformer.py
--- a/src/transformers/configuration_mobilebert.py
+++ b/src/transformers/configuration_mobilebert.py
--- a/src/transformers/configuration_openai.py
+++ b/src/transformers/configuration_openai.py