Commit a75c64d8 authored by Lysandre's avatar Lysandre
Browse files

Black 20 release

parent e78c1103
...@@ -90,7 +90,7 @@ class TokenClassificationTask: ...@@ -90,7 +90,7 @@ class TokenClassificationTask:
sequence_a_segment_id=0, sequence_a_segment_id=0,
mask_padding_with_zero=True, mask_padding_with_zero=True,
) -> List[InputFeatures]: ) -> List[InputFeatures]:
""" Loads a data file into a list of `InputFeatures` """Loads a data file into a list of `InputFeatures`
`cls_token_at_end` define the location of the CLS token: `cls_token_at_end` define the location of the CLS token:
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP] - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
- True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS] - True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
...@@ -230,7 +230,8 @@ if is_torch_available(): ...@@ -230,7 +230,8 @@ if is_torch_available():
): ):
# Load data features from cache or dataset file # Load data features from cache or dataset file
cached_features_file = os.path.join( cached_features_file = os.path.join(
data_dir, "cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)), data_dir,
"cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)),
) )
# Make sure only the first process in distributed training processes the dataset, # Make sure only the first process in distributed training processes the dataset,
......
...@@ -14,7 +14,7 @@ def swish(x): ...@@ -14,7 +14,7 @@ def swish(x):
def _gelu_python(x): def _gelu_python(x):
""" Original Implementation of the gelu activation function in Google Bert repo when initially created. """Original Implementation of the gelu activation function in Google Bert repo when initially created.
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
This is now written in C in torch.nn.functional This is now written in C in torch.nn.functional
...@@ -24,7 +24,7 @@ def _gelu_python(x): ...@@ -24,7 +24,7 @@ def _gelu_python(x):
def gelu_new(x): def gelu_new(x):
""" Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT). """Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
Also see https://arxiv.org/abs/1606.08415 Also see https://arxiv.org/abs/1606.08415
""" """
return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
......
...@@ -199,11 +199,17 @@ class PyTorchBenchmark(Benchmark): ...@@ -199,11 +199,17 @@ class PyTorchBenchmark(Benchmark):
# run additional 10 times to stabilize compilation for tpu and torchscript # run additional 10 times to stabilize compilation for tpu and torchscript
logger.info("Do inference on TPU or torchscript. Running model 5 times to stabilize compilation") logger.info("Do inference on TPU or torchscript. Running model 5 times to stabilize compilation")
timeit.repeat( timeit.repeat(
func, repeat=1, number=5, func,
repeat=1,
number=5,
) )
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,) runtimes = timeit.repeat(
func,
repeat=self.args.repeat,
number=10,
)
if self.args.is_tpu and self.args.torch_xla_tpu_print_metrics: if self.args.is_tpu and self.args.torch_xla_tpu_print_metrics:
import torch_xla.debug.metrics as met import torch_xla.debug.metrics as met
......
...@@ -32,10 +32,12 @@ logger = logging.get_logger(__name__) ...@@ -32,10 +32,12 @@ logger = logging.get_logger(__name__)
@dataclass @dataclass
class TensorFlowBenchmarkArguments(BenchmarkArguments): class TensorFlowBenchmarkArguments(BenchmarkArguments):
tpu_name: str = field( tpu_name: str = field(
default=None, metadata={"help": "Name of TPU"}, default=None,
metadata={"help": "Name of TPU"},
) )
device_idx: int = field( device_idx: int = field(
default=0, metadata={"help": "CPU / GPU device index. Defaults to 0."}, default=0,
metadata={"help": "CPU / GPU device index. Defaults to 0."},
) )
eager_mode: bool = field(default=False, metadata={"help": "Benchmark models in eager model."}) eager_mode: bool = field(default=False, metadata={"help": "Benchmark models in eager model."})
use_xla: bool = field( use_xla: bool = field(
......
...@@ -219,7 +219,11 @@ class TensorFlowBenchmark(Benchmark): ...@@ -219,7 +219,11 @@ class TensorFlowBenchmark(Benchmark):
timeit.repeat(func, repeat=1, number=5) timeit.repeat(func, repeat=1, number=5)
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,) runtimes = timeit.repeat(
func,
repeat=self.args.repeat,
number=10,
)
return min(runtimes) / 10.0 return min(runtimes) / 10.0
except ResourceExhaustedError as e: except ResourceExhaustedError as e:
......
...@@ -106,7 +106,7 @@ def is_memory_tracing_enabled(): ...@@ -106,7 +106,7 @@ def is_memory_tracing_enabled():
class Frame(NamedTuple): class Frame(NamedTuple):
""" `Frame` is a NamedTuple used to gather the current frame state. """`Frame` is a NamedTuple used to gather the current frame state.
`Frame` has the following fields: `Frame` has the following fields:
- 'filename' (string): Name of the file currently executed - 'filename' (string): Name of the file currently executed
- 'module' (string): Name of the module currently executed - 'module' (string): Name of the module currently executed
...@@ -123,7 +123,7 @@ class Frame(NamedTuple): ...@@ -123,7 +123,7 @@ class Frame(NamedTuple):
class UsedMemoryState(NamedTuple): class UsedMemoryState(NamedTuple):
""" `UsedMemoryState` are named tuples with the following fields: """`UsedMemoryState` are named tuples with the following fields:
- 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file) - 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
- 'cpu_memory': CPU RSS memory state *before* executing the line - 'cpu_memory': CPU RSS memory state *before* executing the line
- 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided) - 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
...@@ -135,7 +135,7 @@ class UsedMemoryState(NamedTuple): ...@@ -135,7 +135,7 @@ class UsedMemoryState(NamedTuple):
class Memory(NamedTuple): class Memory(NamedTuple):
""" `Memory` NamedTuple have a single field `bytes` and """`Memory` NamedTuple have a single field `bytes` and
you can get a human readable str of the number of mega bytes by calling `__repr__` you can get a human readable str of the number of mega bytes by calling `__repr__`
- `byte` (integer): number of bytes, - `byte` (integer): number of bytes,
""" """
...@@ -147,7 +147,7 @@ class Memory(NamedTuple): ...@@ -147,7 +147,7 @@ class Memory(NamedTuple):
class MemoryState(NamedTuple): class MemoryState(NamedTuple):
""" `MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields: """`MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
- `frame` (`Frame`): the current frame (see above) - `frame` (`Frame`): the current frame (see above)
- `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple - `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
- `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple - `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
...@@ -161,7 +161,7 @@ class MemoryState(NamedTuple): ...@@ -161,7 +161,7 @@ class MemoryState(NamedTuple):
class MemorySummary(NamedTuple): class MemorySummary(NamedTuple):
""" `MemorySummary` namedtuple otherwise with the fields: """`MemorySummary` namedtuple otherwise with the fields:
- `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace` - `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
by substracting the memory after executing each line from the memory before executing said line. by substracting the memory after executing each line from the memory before executing said line.
- `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line - `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
...@@ -309,7 +309,7 @@ def start_memory_tracing( ...@@ -309,7 +309,7 @@ def start_memory_tracing(
events_to_trace: str = "line", events_to_trace: str = "line",
gpus_to_trace: Optional[List[int]] = None, gpus_to_trace: Optional[List[int]] = None,
) -> MemoryTrace: ) -> MemoryTrace:
""" Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module. """Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
See `./benchmark.py` for usage examples. See `./benchmark.py` for usage examples.
Current memory consumption is returned using psutil and in particular is the RSS memory Current memory consumption is returned using psutil and in particular is the RSS memory
"Resident Set Size” (the non-swapped physical memory the process is using). "Resident Set Size” (the non-swapped physical memory the process is using).
...@@ -371,7 +371,7 @@ def start_memory_tracing( ...@@ -371,7 +371,7 @@ def start_memory_tracing(
memory_trace = [] memory_trace = []
def traceit(frame, event, args): def traceit(frame, event, args):
""" Tracing method executed before running each line in a module or sub-module """Tracing method executed before running each line in a module or sub-module
Record memory allocated in a list with debugging information Record memory allocated in a list with debugging information
""" """
global _is_memory_tracing_enabled global _is_memory_tracing_enabled
...@@ -456,7 +456,7 @@ def start_memory_tracing( ...@@ -456,7 +456,7 @@ def start_memory_tracing(
def stop_memory_tracing( def stop_memory_tracing(
memory_trace: Optional[MemoryTrace] = None, ignore_released_memory: bool = True memory_trace: Optional[MemoryTrace] = None, ignore_released_memory: bool = True
) -> Optional[MemorySummary]: ) -> Optional[MemorySummary]:
""" Stop memory tracing cleanly and return a summary of the memory trace if a trace is given. """Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
Args: Args:
- `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary - `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
...@@ -499,15 +499,19 @@ def stop_memory_tracing( ...@@ -499,15 +499,19 @@ def stop_memory_tracing(
cumulative_memory_dict = defaultdict(lambda: [0, 0, 0]) cumulative_memory_dict = defaultdict(lambda: [0, 0, 0])
for ((frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem),) in zip( for (
memory_trace[:-1], memory_trace[1:] (frame, cpu_mem, gpu_mem),
): (next_frame, next_cpu_mem, next_gpu_mem),
) in zip(memory_trace[:-1], memory_trace[1:]):
cpu_mem_inc = next_cpu_mem - cpu_mem cpu_mem_inc = next_cpu_mem - cpu_mem
gpu_mem_inc = next_gpu_mem - gpu_mem gpu_mem_inc = next_gpu_mem - gpu_mem
cpu_gpu_mem_inc = cpu_mem_inc + gpu_mem_inc cpu_gpu_mem_inc = cpu_mem_inc + gpu_mem_inc
memory_diff_trace.append( memory_diff_trace.append(
MemoryState( MemoryState(
frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc), frame=frame,
cpu=Memory(cpu_mem_inc),
gpu=Memory(gpu_mem_inc),
cpu_gpu=Memory(cpu_gpu_mem_inc),
) )
) )
...@@ -529,7 +533,10 @@ def stop_memory_tracing( ...@@ -529,7 +533,10 @@ def stop_memory_tracing(
) # order by the total CPU + GPU memory increase ) # order by the total CPU + GPU memory increase
cumulative_memory = list( cumulative_memory = list(
MemoryState( MemoryState(
frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc), frame=frame,
cpu=Memory(cpu_mem_inc),
gpu=Memory(gpu_mem_inc),
cpu_gpu=Memory(cpu_gpu_mem_inc),
) )
for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory
) )
...@@ -544,15 +551,17 @@ def stop_memory_tracing( ...@@ -544,15 +551,17 @@ def stop_memory_tracing(
total_memory = Memory(total_memory) total_memory = Memory(total_memory)
return MemorySummary( return MemorySummary(
sequential=memory_diff_trace, cumulative=cumulative_memory, current=memory_curr_trace, total=total_memory, sequential=memory_diff_trace,
cumulative=cumulative_memory,
current=memory_curr_trace,
total=total_memory,
) )
return None return None
def bytes_to_mega_bytes(memory_amount: int) -> int: def bytes_to_mega_bytes(memory_amount: int) -> int:
""" Utility to convert a number of bytes (int) into a number of mega bytes (int) """Utility to convert a number of bytes (int) into a number of mega bytes (int)"""
"""
return memory_amount >> 20 return memory_amount >> 20
......
...@@ -73,30 +73,99 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict( ...@@ -73,30 +73,99 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
CONFIG_MAPPING = OrderedDict( CONFIG_MAPPING = OrderedDict(
[ [
("retribert", RetriBertConfig,), (
("t5", T5Config,), "retribert",
("mobilebert", MobileBertConfig,), RetriBertConfig,
("distilbert", DistilBertConfig,), ),
("albert", AlbertConfig,), (
("camembert", CamembertConfig,), "t5",
("xlm-roberta", XLMRobertaConfig,), T5Config,
),
(
"mobilebert",
MobileBertConfig,
),
(
"distilbert",
DistilBertConfig,
),
(
"albert",
AlbertConfig,
),
(
"camembert",
CamembertConfig,
),
(
"xlm-roberta",
XLMRobertaConfig,
),
("pegasus", PegasusConfig), ("pegasus", PegasusConfig),
("marian", MarianConfig,), (
("mbart", MBartConfig,), "marian",
("bart", BartConfig,), MarianConfig,
("reformer", ReformerConfig,), ),
("longformer", LongformerConfig,), (
("roberta", RobertaConfig,), "mbart",
("flaubert", FlaubertConfig,), MBartConfig,
("bert", BertConfig,), ),
("openai-gpt", OpenAIGPTConfig,), (
("gpt2", GPT2Config,), "bart",
("transfo-xl", TransfoXLConfig,), BartConfig,
("xlnet", XLNetConfig,), ),
("xlm", XLMConfig,), (
("ctrl", CTRLConfig,), "reformer",
("electra", ElectraConfig,), ReformerConfig,
("encoder-decoder", EncoderDecoderConfig,), ),
(
"longformer",
LongformerConfig,
),
(
"roberta",
RobertaConfig,
),
(
"flaubert",
FlaubertConfig,
),
(
"bert",
BertConfig,
),
(
"openai-gpt",
OpenAIGPTConfig,
),
(
"gpt2",
GPT2Config,
),
(
"transfo-xl",
TransfoXLConfig,
),
(
"xlnet",
XLNetConfig,
),
(
"xlm",
XLMConfig,
),
(
"ctrl",
CTRLConfig,
),
(
"electra",
ElectraConfig,
),
(
"encoder-decoder",
EncoderDecoderConfig,
),
] ]
) )
......
...@@ -143,8 +143,7 @@ class FlaubertConfig(XLMConfig): ...@@ -143,8 +143,7 @@ class FlaubertConfig(XLMConfig):
model_type = "flaubert" model_type = "flaubert"
def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs): def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs):
"""Constructs FlaubertConfig. """Constructs FlaubertConfig."""
"""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, **kwargs) super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, **kwargs)
self.layerdrop = layerdrop self.layerdrop = layerdrop
self.pre_norm = pre_norm self.pre_norm = pre_norm
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment