Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a75c64d8
Commit
a75c64d8
authored
Aug 26, 2020
by
Lysandre
Browse files
Black 20 release
parent
e78c1103
Changes
191
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
969 additions
and
879 deletions
+969
-879
examples/token-classification/utils_ner.py
examples/token-classification/utils_ner.py
+7
-6
src/transformers/activations.py
src/transformers/activations.py
+7
-7
src/transformers/benchmark/benchmark.py
src/transformers/benchmark/benchmark.py
+8
-2
src/transformers/benchmark/benchmark_args_tf.py
src/transformers/benchmark/benchmark_args_tf.py
+4
-2
src/transformers/benchmark/benchmark_tf.py
src/transformers/benchmark/benchmark_tf.py
+5
-1
src/transformers/benchmark/benchmark_utils.py
src/transformers/benchmark/benchmark_utils.py
+143
-134
src/transformers/configuration_albert.py
src/transformers/configuration_albert.py
+65
-65
src/transformers/configuration_auto.py
src/transformers/configuration_auto.py
+98
-29
src/transformers/configuration_bart.py
src/transformers/configuration_bart.py
+6
-6
src/transformers/configuration_bert.py
src/transformers/configuration_bert.py
+53
-53
src/transformers/configuration_ctrl.py
src/transformers/configuration_ctrl.py
+49
-49
src/transformers/configuration_distilbert.py
src/transformers/configuration_distilbert.py
+55
-55
src/transformers/configuration_dpr.py
src/transformers/configuration_dpr.py
+8
-8
src/transformers/configuration_electra.py
src/transformers/configuration_electra.py
+76
-76
src/transformers/configuration_encoder_decoder.py
src/transformers/configuration_encoder_decoder.py
+31
-31
src/transformers/configuration_flaubert.py
src/transformers/configuration_flaubert.py
+106
-107
src/transformers/configuration_gpt2.py
src/transformers/configuration_gpt2.py
+78
-78
src/transformers/configuration_longformer.py
src/transformers/configuration_longformer.py
+19
-19
src/transformers/configuration_mobilebert.py
src/transformers/configuration_mobilebert.py
+73
-73
src/transformers/configuration_openai.py
src/transformers/configuration_openai.py
+78
-78
No files found.
examples/token-classification/utils_ner.py
View file @
a75c64d8
...
...
@@ -90,7 +90,7 @@ class TokenClassificationTask:
sequence_a_segment_id
=
0
,
mask_padding_with_zero
=
True
,
)
->
List
[
InputFeatures
]:
"""
Loads a data file into a list of `InputFeatures`
"""Loads a data file into a list of `InputFeatures`
`cls_token_at_end` define the location of the CLS token:
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
- True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
...
...
@@ -230,7 +230,8 @@ if is_torch_available():
):
# Load data features from cache or dataset file
cached_features_file
=
os
.
path
.
join
(
data_dir
,
"cached_{}_{}_{}"
.
format
(
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
max_seq_length
)),
data_dir
,
"cached_{}_{}_{}"
.
format
(
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
max_seq_length
)),
)
# Make sure only the first process in distributed training processes the dataset,
...
...
src/transformers/activations.py
View file @
a75c64d8
...
...
@@ -14,7 +14,7 @@ def swish(x):
def
_gelu_python
(
x
):
"""
Original Implementation of the gelu activation function in Google Bert repo when initially created.
"""Original Implementation of the gelu activation function in Google Bert repo when initially created.
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
This is now written in C in torch.nn.functional
...
...
@@ -24,7 +24,7 @@ def _gelu_python(x):
def
gelu_new
(
x
):
"""
Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
"""Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
Also see https://arxiv.org/abs/1606.08415
"""
return
0.5
*
x
*
(
1.0
+
torch
.
tanh
(
math
.
sqrt
(
2.0
/
math
.
pi
)
*
(
x
+
0.044715
*
torch
.
pow
(
x
,
3.0
))))
...
...
src/transformers/benchmark/benchmark.py
View file @
a75c64d8
...
...
@@ -199,11 +199,17 @@ class PyTorchBenchmark(Benchmark):
# run additional 10 times to stabilize compilation for tpu and torchscript
logger
.
info
(
"Do inference on TPU or torchscript. Running model 5 times to stabilize compilation"
)
timeit
.
repeat
(
func
,
repeat
=
1
,
number
=
5
,
func
,
repeat
=
1
,
number
=
5
,
)
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,)
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,
)
if
self
.
args
.
is_tpu
and
self
.
args
.
torch_xla_tpu_print_metrics
:
import
torch_xla.debug.metrics
as
met
...
...
src/transformers/benchmark/benchmark_args_tf.py
View file @
a75c64d8
...
...
@@ -32,10 +32,12 @@ logger = logging.get_logger(__name__)
@
dataclass
class
TensorFlowBenchmarkArguments
(
BenchmarkArguments
):
tpu_name
:
str
=
field
(
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
)
device_idx
:
int
=
field
(
default
=
0
,
metadata
=
{
"help"
:
"CPU / GPU device index. Defaults to 0."
},
default
=
0
,
metadata
=
{
"help"
:
"CPU / GPU device index. Defaults to 0."
},
)
eager_mode
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Benchmark models in eager model."
})
use_xla
:
bool
=
field
(
...
...
src/transformers/benchmark/benchmark_tf.py
View file @
a75c64d8
...
...
@@ -219,7 +219,11 @@ class TensorFlowBenchmark(Benchmark):
timeit
.
repeat
(
func
,
repeat
=
1
,
number
=
5
)
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,)
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,
)
return
min
(
runtimes
)
/
10.0
except
ResourceExhaustedError
as
e
:
...
...
src/transformers/benchmark/benchmark_utils.py
View file @
a75c64d8
...
...
@@ -106,7 +106,7 @@ def is_memory_tracing_enabled():
class
Frame
(
NamedTuple
):
"""
`Frame` is a NamedTuple used to gather the current frame state.
"""`Frame` is a NamedTuple used to gather the current frame state.
`Frame` has the following fields:
- 'filename' (string): Name of the file currently executed
- 'module' (string): Name of the module currently executed
...
...
@@ -123,7 +123,7 @@ class Frame(NamedTuple):
class
UsedMemoryState
(
NamedTuple
):
"""
`UsedMemoryState` are named tuples with the following fields:
"""`UsedMemoryState` are named tuples with the following fields:
- 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
- 'cpu_memory': CPU RSS memory state *before* executing the line
- 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
...
...
@@ -135,7 +135,7 @@ class UsedMemoryState(NamedTuple):
class
Memory
(
NamedTuple
):
"""
`Memory` NamedTuple have a single field `bytes` and
"""`Memory` NamedTuple have a single field `bytes` and
you can get a human readable str of the number of mega bytes by calling `__repr__`
- `byte` (integer): number of bytes,
"""
...
...
@@ -147,7 +147,7 @@ class Memory(NamedTuple):
class
MemoryState
(
NamedTuple
):
"""
`MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
"""`MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
- `frame` (`Frame`): the current frame (see above)
- `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
- `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
...
...
@@ -161,7 +161,7 @@ class MemoryState(NamedTuple):
class
MemorySummary
(
NamedTuple
):
"""
`MemorySummary` namedtuple otherwise with the fields:
"""`MemorySummary` namedtuple otherwise with the fields:
- `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
by substracting the memory after executing each line from the memory before executing said line.
- `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
...
...
@@ -309,7 +309,7 @@ def start_memory_tracing(
events_to_trace
:
str
=
"line"
,
gpus_to_trace
:
Optional
[
List
[
int
]]
=
None
,
)
->
MemoryTrace
:
"""
Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
"""Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
See `./benchmark.py` for usage examples.
Current memory consumption is returned using psutil and in particular is the RSS memory
"Resident Set Size” (the non-swapped physical memory the process is using).
...
...
@@ -371,7 +371,7 @@ def start_memory_tracing(
memory_trace
=
[]
def
traceit
(
frame
,
event
,
args
):
"""
Tracing method executed before running each line in a module or sub-module
"""Tracing method executed before running each line in a module or sub-module
Record memory allocated in a list with debugging information
"""
global
_is_memory_tracing_enabled
...
...
@@ -456,7 +456,7 @@ def start_memory_tracing(
def
stop_memory_tracing
(
memory_trace
:
Optional
[
MemoryTrace
]
=
None
,
ignore_released_memory
:
bool
=
True
)
->
Optional
[
MemorySummary
]:
"""
Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
"""Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
Args:
- `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
...
...
@@ -499,15 +499,19 @@ def stop_memory_tracing(
cumulative_memory_dict
=
defaultdict
(
lambda
:
[
0
,
0
,
0
])
for
((
frame
,
cpu_mem
,
gpu_mem
),
(
next_frame
,
next_cpu_mem
,
next_gpu_mem
),)
in
zip
(
memory_trace
[:
-
1
],
memory_trace
[
1
:]
):
for
(
(
frame
,
cpu_mem
,
gpu_mem
),
(
next_frame
,
next_cpu_mem
,
next_gpu_mem
),
)
in
zip
(
memory_trace
[:
-
1
],
memory_trace
[
1
:]):
cpu_mem_inc
=
next_cpu_mem
-
cpu_mem
gpu_mem_inc
=
next_gpu_mem
-
gpu_mem
cpu_gpu_mem_inc
=
cpu_mem_inc
+
gpu_mem_inc
memory_diff_trace
.
append
(
MemoryState
(
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
)
)
...
...
@@ -529,7 +533,10 @@ def stop_memory_tracing(
)
# order by the total CPU + GPU memory increase
cumulative_memory
=
list
(
MemoryState
(
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
)
for
frame
,
(
cpu_mem_inc
,
gpu_mem_inc
,
cpu_gpu_mem_inc
)
in
cumulative_memory
)
...
...
@@ -544,15 +551,17 @@ def stop_memory_tracing(
total_memory
=
Memory
(
total_memory
)
return
MemorySummary
(
sequential
=
memory_diff_trace
,
cumulative
=
cumulative_memory
,
current
=
memory_curr_trace
,
total
=
total_memory
,
sequential
=
memory_diff_trace
,
cumulative
=
cumulative_memory
,
current
=
memory_curr_trace
,
total
=
total_memory
,
)
return
None
def
bytes_to_mega_bytes
(
memory_amount
:
int
)
->
int
:
""" Utility to convert a number of bytes (int) into a number of mega bytes (int)
"""
"""Utility to convert a number of bytes (int) into a number of mega bytes (int)"""
return
memory_amount
>>
20
...
...
src/transformers/configuration_albert.py
View file @
a75c64d8
src/transformers/configuration_auto.py
View file @
a75c64d8
...
...
@@ -73,30 +73,99 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
CONFIG_MAPPING
=
OrderedDict
(
[
(
"retribert"
,
RetriBertConfig
,),
(
"t5"
,
T5Config
,),
(
"mobilebert"
,
MobileBertConfig
,),
(
"distilbert"
,
DistilBertConfig
,),
(
"albert"
,
AlbertConfig
,),
(
"camembert"
,
CamembertConfig
,),
(
"xlm-roberta"
,
XLMRobertaConfig
,),
(
"retribert"
,
RetriBertConfig
,
),
(
"t5"
,
T5Config
,
),
(
"mobilebert"
,
MobileBertConfig
,
),
(
"distilbert"
,
DistilBertConfig
,
),
(
"albert"
,
AlbertConfig
,
),
(
"camembert"
,
CamembertConfig
,
),
(
"xlm-roberta"
,
XLMRobertaConfig
,
),
(
"pegasus"
,
PegasusConfig
),
(
"marian"
,
MarianConfig
,),
(
"mbart"
,
MBartConfig
,),
(
"bart"
,
BartConfig
,),
(
"reformer"
,
ReformerConfig
,),
(
"longformer"
,
LongformerConfig
,),
(
"roberta"
,
RobertaConfig
,),
(
"flaubert"
,
FlaubertConfig
,),
(
"bert"
,
BertConfig
,),
(
"openai-gpt"
,
OpenAIGPTConfig
,),
(
"gpt2"
,
GPT2Config
,),
(
"transfo-xl"
,
TransfoXLConfig
,),
(
"xlnet"
,
XLNetConfig
,),
(
"xlm"
,
XLMConfig
,),
(
"ctrl"
,
CTRLConfig
,),
(
"electra"
,
ElectraConfig
,),
(
"encoder-decoder"
,
EncoderDecoderConfig
,),
(
"marian"
,
MarianConfig
,
),
(
"mbart"
,
MBartConfig
,
),
(
"bart"
,
BartConfig
,
),
(
"reformer"
,
ReformerConfig
,
),
(
"longformer"
,
LongformerConfig
,
),
(
"roberta"
,
RobertaConfig
,
),
(
"flaubert"
,
FlaubertConfig
,
),
(
"bert"
,
BertConfig
,
),
(
"openai-gpt"
,
OpenAIGPTConfig
,
),
(
"gpt2"
,
GPT2Config
,
),
(
"transfo-xl"
,
TransfoXLConfig
,
),
(
"xlnet"
,
XLNetConfig
,
),
(
"xlm"
,
XLMConfig
,
),
(
"ctrl"
,
CTRLConfig
,
),
(
"electra"
,
ElectraConfig
,
),
(
"encoder-decoder"
,
EncoderDecoderConfig
,
),
]
)
...
...
src/transformers/configuration_bart.py
View file @
a75c64d8
src/transformers/configuration_bert.py
View file @
a75c64d8
src/transformers/configuration_ctrl.py
View file @
a75c64d8
src/transformers/configuration_distilbert.py
View file @
a75c64d8
src/transformers/configuration_dpr.py
View file @
a75c64d8
src/transformers/configuration_electra.py
View file @
a75c64d8
src/transformers/configuration_encoder_decoder.py
View file @
a75c64d8
src/transformers/configuration_flaubert.py
View file @
a75c64d8
...
...
@@ -143,8 +143,7 @@ class FlaubertConfig(XLMConfig):
model_type
=
"flaubert"
def
__init__
(
self
,
layerdrop
=
0.0
,
pre_norm
=
False
,
pad_token_id
=
2
,
bos_token_id
=
0
,
**
kwargs
):
"""Constructs FlaubertConfig.
"""
"""Constructs FlaubertConfig."""
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
**
kwargs
)
self
.
layerdrop
=
layerdrop
self
.
pre_norm
=
pre_norm
src/transformers/configuration_gpt2.py
View file @
a75c64d8
src/transformers/configuration_longformer.py
View file @
a75c64d8
src/transformers/configuration_mobilebert.py
View file @
a75c64d8
src/transformers/configuration_openai.py
View file @
a75c64d8
Prev
1
2
3
4
5
6
7
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment