"pytorch_pretrained_bert/modeling_bert.py" did not exist on "bd74632687b368f6a0c9d3a36cb5f70a393dd5ca"
Commit a75c64d8 authored by Lysandre's avatar Lysandre
Browse files

Black 20 release

parent e78c1103
......@@ -90,7 +90,7 @@ class TokenClassificationTask:
sequence_a_segment_id=0,
mask_padding_with_zero=True,
) -> List[InputFeatures]:
""" Loads a data file into a list of `InputFeatures`
"""Loads a data file into a list of `InputFeatures`
`cls_token_at_end` define the location of the CLS token:
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
- True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
......@@ -230,7 +230,8 @@ if is_torch_available():
):
# Load data features from cache or dataset file
cached_features_file = os.path.join(
data_dir, "cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)),
data_dir,
"cached_{}_{}_{}".format(mode.value, tokenizer.__class__.__name__, str(max_seq_length)),
)
# Make sure only the first process in distributed training processes the dataset,
......
......@@ -14,7 +14,7 @@ def swish(x):
def _gelu_python(x):
""" Original Implementation of the gelu activation function in Google Bert repo when initially created.
"""Original Implementation of the gelu activation function in Google Bert repo when initially created.
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
This is now written in C in torch.nn.functional
......@@ -24,7 +24,7 @@ def _gelu_python(x):
def gelu_new(x):
""" Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
"""Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
Also see https://arxiv.org/abs/1606.08415
"""
return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
......
......@@ -199,11 +199,17 @@ class PyTorchBenchmark(Benchmark):
# run additional 10 times to stabilize compilation for tpu and torchscript
logger.info("Do inference on TPU or torchscript. Running model 5 times to stabilize compilation")
timeit.repeat(
func, repeat=1, number=5,
func,
repeat=1,
number=5,
)
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,)
runtimes = timeit.repeat(
func,
repeat=self.args.repeat,
number=10,
)
if self.args.is_tpu and self.args.torch_xla_tpu_print_metrics:
import torch_xla.debug.metrics as met
......
......@@ -32,10 +32,12 @@ logger = logging.get_logger(__name__)
@dataclass
class TensorFlowBenchmarkArguments(BenchmarkArguments):
tpu_name: str = field(
default=None, metadata={"help": "Name of TPU"},
default=None,
metadata={"help": "Name of TPU"},
)
device_idx: int = field(
default=0, metadata={"help": "CPU / GPU device index. Defaults to 0."},
default=0,
metadata={"help": "CPU / GPU device index. Defaults to 0."},
)
eager_mode: bool = field(default=False, metadata={"help": "Benchmark models in eager model."})
use_xla: bool = field(
......
......@@ -219,7 +219,11 @@ class TensorFlowBenchmark(Benchmark):
timeit.repeat(func, repeat=1, number=5)
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes = timeit.repeat(func, repeat=self.args.repeat, number=10,)
runtimes = timeit.repeat(
func,
repeat=self.args.repeat,
number=10,
)
return min(runtimes) / 10.0
except ResourceExhaustedError as e:
......
......@@ -106,7 +106,7 @@ def is_memory_tracing_enabled():
class Frame(NamedTuple):
""" `Frame` is a NamedTuple used to gather the current frame state.
"""`Frame` is a NamedTuple used to gather the current frame state.
`Frame` has the following fields:
- 'filename' (string): Name of the file currently executed
- 'module' (string): Name of the module currently executed
......@@ -123,7 +123,7 @@ class Frame(NamedTuple):
class UsedMemoryState(NamedTuple):
""" `UsedMemoryState` are named tuples with the following fields:
"""`UsedMemoryState` are named tuples with the following fields:
- 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
- 'cpu_memory': CPU RSS memory state *before* executing the line
- 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
......@@ -135,7 +135,7 @@ class UsedMemoryState(NamedTuple):
class Memory(NamedTuple):
""" `Memory` NamedTuple have a single field `bytes` and
"""`Memory` NamedTuple have a single field `bytes` and
you can get a human readable str of the number of mega bytes by calling `__repr__`
- `byte` (integer): number of bytes,
"""
......@@ -147,7 +147,7 @@ class Memory(NamedTuple):
class MemoryState(NamedTuple):
""" `MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
"""`MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
- `frame` (`Frame`): the current frame (see above)
- `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
- `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
......@@ -161,7 +161,7 @@ class MemoryState(NamedTuple):
class MemorySummary(NamedTuple):
""" `MemorySummary` namedtuple otherwise with the fields:
"""`MemorySummary` namedtuple otherwise with the fields:
- `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
by substracting the memory after executing each line from the memory before executing said line.
- `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
......@@ -309,7 +309,7 @@ def start_memory_tracing(
events_to_trace: str = "line",
gpus_to_trace: Optional[List[int]] = None,
) -> MemoryTrace:
""" Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
"""Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
See `./benchmark.py` for usage examples.
Current memory consumption is returned using psutil and in particular is the RSS memory
"Resident Set Size” (the non-swapped physical memory the process is using).
......@@ -371,7 +371,7 @@ def start_memory_tracing(
memory_trace = []
def traceit(frame, event, args):
""" Tracing method executed before running each line in a module or sub-module
"""Tracing method executed before running each line in a module or sub-module
Record memory allocated in a list with debugging information
"""
global _is_memory_tracing_enabled
......@@ -456,7 +456,7 @@ def start_memory_tracing(
def stop_memory_tracing(
memory_trace: Optional[MemoryTrace] = None, ignore_released_memory: bool = True
) -> Optional[MemorySummary]:
""" Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
"""Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
Args:
- `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
......@@ -499,15 +499,19 @@ def stop_memory_tracing(
cumulative_memory_dict = defaultdict(lambda: [0, 0, 0])
for ((frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem),) in zip(
memory_trace[:-1], memory_trace[1:]
):
for (
(frame, cpu_mem, gpu_mem),
(next_frame, next_cpu_mem, next_gpu_mem),
) in zip(memory_trace[:-1], memory_trace[1:]):
cpu_mem_inc = next_cpu_mem - cpu_mem
gpu_mem_inc = next_gpu_mem - gpu_mem
cpu_gpu_mem_inc = cpu_mem_inc + gpu_mem_inc
memory_diff_trace.append(
MemoryState(
frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc),
frame=frame,
cpu=Memory(cpu_mem_inc),
gpu=Memory(gpu_mem_inc),
cpu_gpu=Memory(cpu_gpu_mem_inc),
)
)
......@@ -529,7 +533,10 @@ def stop_memory_tracing(
) # order by the total CPU + GPU memory increase
cumulative_memory = list(
MemoryState(
frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc),
frame=frame,
cpu=Memory(cpu_mem_inc),
gpu=Memory(gpu_mem_inc),
cpu_gpu=Memory(cpu_gpu_mem_inc),
)
for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory
)
......@@ -544,15 +551,17 @@ def stop_memory_tracing(
total_memory = Memory(total_memory)
return MemorySummary(
sequential=memory_diff_trace, cumulative=cumulative_memory, current=memory_curr_trace, total=total_memory,
sequential=memory_diff_trace,
cumulative=cumulative_memory,
current=memory_curr_trace,
total=total_memory,
)
return None
def bytes_to_mega_bytes(memory_amount: int) -> int:
""" Utility to convert a number of bytes (int) into a number of mega bytes (int)
"""
"""Utility to convert a number of bytes (int) into a number of mega bytes (int)"""
return memory_amount >> 20
......
......@@ -73,30 +73,99 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
CONFIG_MAPPING = OrderedDict(
[
("retribert", RetriBertConfig,),
("t5", T5Config,),
("mobilebert", MobileBertConfig,),
("distilbert", DistilBertConfig,),
("albert", AlbertConfig,),
("camembert", CamembertConfig,),
("xlm-roberta", XLMRobertaConfig,),
(
"retribert",
RetriBertConfig,
),
(
"t5",
T5Config,
),
(
"mobilebert",
MobileBertConfig,
),
(
"distilbert",
DistilBertConfig,
),
(
"albert",
AlbertConfig,
),
(
"camembert",
CamembertConfig,
),
(
"xlm-roberta",
XLMRobertaConfig,
),
("pegasus", PegasusConfig),
("marian", MarianConfig,),
("mbart", MBartConfig,),
("bart", BartConfig,),
("reformer", ReformerConfig,),
("longformer", LongformerConfig,),
("roberta", RobertaConfig,),
("flaubert", FlaubertConfig,),
("bert", BertConfig,),
("openai-gpt", OpenAIGPTConfig,),
("gpt2", GPT2Config,),
("transfo-xl", TransfoXLConfig,),
("xlnet", XLNetConfig,),
("xlm", XLMConfig,),
("ctrl", CTRLConfig,),
("electra", ElectraConfig,),
("encoder-decoder", EncoderDecoderConfig,),
(
"marian",
MarianConfig,
),
(
"mbart",
MBartConfig,
),
(
"bart",
BartConfig,
),
(
"reformer",
ReformerConfig,
),
(
"longformer",
LongformerConfig,
),
(
"roberta",
RobertaConfig,
),
(
"flaubert",
FlaubertConfig,
),
(
"bert",
BertConfig,
),
(
"openai-gpt",
OpenAIGPTConfig,
),
(
"gpt2",
GPT2Config,
),
(
"transfo-xl",
TransfoXLConfig,
),
(
"xlnet",
XLNetConfig,
),
(
"xlm",
XLMConfig,
),
(
"ctrl",
CTRLConfig,
),
(
"electra",
ElectraConfig,
),
(
"encoder-decoder",
EncoderDecoderConfig,
),
]
)
......
......@@ -143,8 +143,7 @@ class FlaubertConfig(XLMConfig):
model_type = "flaubert"
def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs):
"""Constructs FlaubertConfig.
"""
"""Constructs FlaubertConfig."""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, **kwargs)
self.layerdrop = layerdrop
self.pre_norm = pre_norm
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment