Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a75c64d8
Commit
a75c64d8
authored
Aug 26, 2020
by
Lysandre
Browse files
Black 20 release
parent
e78c1103
Changes
191
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
969 additions
and
879 deletions
+969
-879
examples/token-classification/utils_ner.py
examples/token-classification/utils_ner.py
+7
-6
src/transformers/activations.py
src/transformers/activations.py
+7
-7
src/transformers/benchmark/benchmark.py
src/transformers/benchmark/benchmark.py
+8
-2
src/transformers/benchmark/benchmark_args_tf.py
src/transformers/benchmark/benchmark_args_tf.py
+4
-2
src/transformers/benchmark/benchmark_tf.py
src/transformers/benchmark/benchmark_tf.py
+5
-1
src/transformers/benchmark/benchmark_utils.py
src/transformers/benchmark/benchmark_utils.py
+143
-134
src/transformers/configuration_albert.py
src/transformers/configuration_albert.py
+65
-65
src/transformers/configuration_auto.py
src/transformers/configuration_auto.py
+98
-29
src/transformers/configuration_bart.py
src/transformers/configuration_bart.py
+6
-6
src/transformers/configuration_bert.py
src/transformers/configuration_bert.py
+53
-53
src/transformers/configuration_ctrl.py
src/transformers/configuration_ctrl.py
+49
-49
src/transformers/configuration_distilbert.py
src/transformers/configuration_distilbert.py
+55
-55
src/transformers/configuration_dpr.py
src/transformers/configuration_dpr.py
+8
-8
src/transformers/configuration_electra.py
src/transformers/configuration_electra.py
+76
-76
src/transformers/configuration_encoder_decoder.py
src/transformers/configuration_encoder_decoder.py
+31
-31
src/transformers/configuration_flaubert.py
src/transformers/configuration_flaubert.py
+106
-107
src/transformers/configuration_gpt2.py
src/transformers/configuration_gpt2.py
+78
-78
src/transformers/configuration_longformer.py
src/transformers/configuration_longformer.py
+19
-19
src/transformers/configuration_mobilebert.py
src/transformers/configuration_mobilebert.py
+73
-73
src/transformers/configuration_openai.py
src/transformers/configuration_openai.py
+78
-78
No files found.
examples/token-classification/utils_ner.py
View file @
a75c64d8
...
@@ -90,7 +90,7 @@ class TokenClassificationTask:
...
@@ -90,7 +90,7 @@ class TokenClassificationTask:
sequence_a_segment_id
=
0
,
sequence_a_segment_id
=
0
,
mask_padding_with_zero
=
True
,
mask_padding_with_zero
=
True
,
)
->
List
[
InputFeatures
]:
)
->
List
[
InputFeatures
]:
"""
Loads a data file into a list of `InputFeatures`
"""Loads a data file into a list of `InputFeatures`
`cls_token_at_end` define the location of the CLS token:
`cls_token_at_end` define the location of the CLS token:
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
- True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
- True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
...
@@ -230,7 +230,8 @@ if is_torch_available():
...
@@ -230,7 +230,8 @@ if is_torch_available():
):
):
# Load data features from cache or dataset file
# Load data features from cache or dataset file
cached_features_file
=
os
.
path
.
join
(
cached_features_file
=
os
.
path
.
join
(
data_dir
,
"cached_{}_{}_{}"
.
format
(
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
max_seq_length
)),
data_dir
,
"cached_{}_{}_{}"
.
format
(
mode
.
value
,
tokenizer
.
__class__
.
__name__
,
str
(
max_seq_length
)),
)
)
# Make sure only the first process in distributed training processes the dataset,
# Make sure only the first process in distributed training processes the dataset,
...
...
src/transformers/activations.py
View file @
a75c64d8
...
@@ -14,7 +14,7 @@ def swish(x):
...
@@ -14,7 +14,7 @@ def swish(x):
def
_gelu_python
(
x
):
def
_gelu_python
(
x
):
"""
Original Implementation of the gelu activation function in Google Bert repo when initially created.
"""Original Implementation of the gelu activation function in Google Bert repo when initially created.
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
This is now written in C in torch.nn.functional
This is now written in C in torch.nn.functional
...
@@ -24,7 +24,7 @@ def _gelu_python(x):
...
@@ -24,7 +24,7 @@ def _gelu_python(x):
def
gelu_new
(
x
):
def
gelu_new
(
x
):
"""
Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
"""Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
Also see https://arxiv.org/abs/1606.08415
Also see https://arxiv.org/abs/1606.08415
"""
"""
return
0.5
*
x
*
(
1.0
+
torch
.
tanh
(
math
.
sqrt
(
2.0
/
math
.
pi
)
*
(
x
+
0.044715
*
torch
.
pow
(
x
,
3.0
))))
return
0.5
*
x
*
(
1.0
+
torch
.
tanh
(
math
.
sqrt
(
2.0
/
math
.
pi
)
*
(
x
+
0.044715
*
torch
.
pow
(
x
,
3.0
))))
...
...
src/transformers/benchmark/benchmark.py
View file @
a75c64d8
...
@@ -199,11 +199,17 @@ class PyTorchBenchmark(Benchmark):
...
@@ -199,11 +199,17 @@ class PyTorchBenchmark(Benchmark):
# run additional 10 times to stabilize compilation for tpu and torchscript
# run additional 10 times to stabilize compilation for tpu and torchscript
logger
.
info
(
"Do inference on TPU or torchscript. Running model 5 times to stabilize compilation"
)
logger
.
info
(
"Do inference on TPU or torchscript. Running model 5 times to stabilize compilation"
)
timeit
.
repeat
(
timeit
.
repeat
(
func
,
repeat
=
1
,
number
=
5
,
func
,
repeat
=
1
,
number
=
5
,
)
)
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,)
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,
)
if
self
.
args
.
is_tpu
and
self
.
args
.
torch_xla_tpu_print_metrics
:
if
self
.
args
.
is_tpu
and
self
.
args
.
torch_xla_tpu_print_metrics
:
import
torch_xla.debug.metrics
as
met
import
torch_xla.debug.metrics
as
met
...
...
src/transformers/benchmark/benchmark_args_tf.py
View file @
a75c64d8
...
@@ -32,10 +32,12 @@ logger = logging.get_logger(__name__)
...
@@ -32,10 +32,12 @@ logger = logging.get_logger(__name__)
@
dataclass
@
dataclass
class
TensorFlowBenchmarkArguments
(
BenchmarkArguments
):
class
TensorFlowBenchmarkArguments
(
BenchmarkArguments
):
tpu_name
:
str
=
field
(
tpu_name
:
str
=
field
(
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
default
=
None
,
metadata
=
{
"help"
:
"Name of TPU"
},
)
)
device_idx
:
int
=
field
(
device_idx
:
int
=
field
(
default
=
0
,
metadata
=
{
"help"
:
"CPU / GPU device index. Defaults to 0."
},
default
=
0
,
metadata
=
{
"help"
:
"CPU / GPU device index. Defaults to 0."
},
)
)
eager_mode
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Benchmark models in eager model."
})
eager_mode
:
bool
=
field
(
default
=
False
,
metadata
=
{
"help"
:
"Benchmark models in eager model."
})
use_xla
:
bool
=
field
(
use_xla
:
bool
=
field
(
...
...
src/transformers/benchmark/benchmark_tf.py
View file @
a75c64d8
...
@@ -219,7 +219,11 @@ class TensorFlowBenchmark(Benchmark):
...
@@ -219,7 +219,11 @@ class TensorFlowBenchmark(Benchmark):
timeit
.
repeat
(
func
,
repeat
=
1
,
number
=
5
)
timeit
.
repeat
(
func
,
repeat
=
1
,
number
=
5
)
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,)
runtimes
=
timeit
.
repeat
(
func
,
repeat
=
self
.
args
.
repeat
,
number
=
10
,
)
return
min
(
runtimes
)
/
10.0
return
min
(
runtimes
)
/
10.0
except
ResourceExhaustedError
as
e
:
except
ResourceExhaustedError
as
e
:
...
...
src/transformers/benchmark/benchmark_utils.py
View file @
a75c64d8
...
@@ -106,7 +106,7 @@ def is_memory_tracing_enabled():
...
@@ -106,7 +106,7 @@ def is_memory_tracing_enabled():
class
Frame
(
NamedTuple
):
class
Frame
(
NamedTuple
):
"""
`Frame` is a NamedTuple used to gather the current frame state.
"""`Frame` is a NamedTuple used to gather the current frame state.
`Frame` has the following fields:
`Frame` has the following fields:
- 'filename' (string): Name of the file currently executed
- 'filename' (string): Name of the file currently executed
- 'module' (string): Name of the module currently executed
- 'module' (string): Name of the module currently executed
...
@@ -123,7 +123,7 @@ class Frame(NamedTuple):
...
@@ -123,7 +123,7 @@ class Frame(NamedTuple):
class
UsedMemoryState
(
NamedTuple
):
class
UsedMemoryState
(
NamedTuple
):
"""
`UsedMemoryState` are named tuples with the following fields:
"""`UsedMemoryState` are named tuples with the following fields:
- 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
- 'frame': a `Frame` namedtuple (see below) storing information on the current tracing frame (current file, location in current file)
- 'cpu_memory': CPU RSS memory state *before* executing the line
- 'cpu_memory': CPU RSS memory state *before* executing the line
- 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
- 'gpu_memory': GPU used memory *before* executing the line (sum for all GPUs or for only `gpus_to_trace` if provided)
...
@@ -135,7 +135,7 @@ class UsedMemoryState(NamedTuple):
...
@@ -135,7 +135,7 @@ class UsedMemoryState(NamedTuple):
class
Memory
(
NamedTuple
):
class
Memory
(
NamedTuple
):
"""
`Memory` NamedTuple have a single field `bytes` and
"""`Memory` NamedTuple have a single field `bytes` and
you can get a human readable str of the number of mega bytes by calling `__repr__`
you can get a human readable str of the number of mega bytes by calling `__repr__`
- `byte` (integer): number of bytes,
- `byte` (integer): number of bytes,
"""
"""
...
@@ -147,7 +147,7 @@ class Memory(NamedTuple):
...
@@ -147,7 +147,7 @@ class Memory(NamedTuple):
class
MemoryState
(
NamedTuple
):
class
MemoryState
(
NamedTuple
):
"""
`MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
"""`MemoryState` are namedtuples listing frame + CPU/GPU memory with the following fields:
- `frame` (`Frame`): the current frame (see above)
- `frame` (`Frame`): the current frame (see above)
- `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
- `cpu`: CPU memory consumed at during the current frame as a `Memory` named tuple
- `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
- `gpu`: GPU memory consumed at during the current frame as a `Memory` named tuple
...
@@ -161,7 +161,7 @@ class MemoryState(NamedTuple):
...
@@ -161,7 +161,7 @@ class MemoryState(NamedTuple):
class
MemorySummary
(
NamedTuple
):
class
MemorySummary
(
NamedTuple
):
"""
`MemorySummary` namedtuple otherwise with the fields:
"""`MemorySummary` namedtuple otherwise with the fields:
- `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
- `sequential`: a list of `MemoryState` namedtuple (see below) computed from the provided `memory_trace`
by substracting the memory after executing each line from the memory before executing said line.
by substracting the memory after executing each line from the memory before executing said line.
- `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
- `cumulative`: a list of `MemoryState` namedtuple (see below) with cumulative increase in memory for each line
...
@@ -309,7 +309,7 @@ def start_memory_tracing(
...
@@ -309,7 +309,7 @@ def start_memory_tracing(
events_to_trace
:
str
=
"line"
,
events_to_trace
:
str
=
"line"
,
gpus_to_trace
:
Optional
[
List
[
int
]]
=
None
,
gpus_to_trace
:
Optional
[
List
[
int
]]
=
None
,
)
->
MemoryTrace
:
)
->
MemoryTrace
:
"""
Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
"""Setup line-by-line tracing to record rss mem (RAM) at each line of a module or sub-module.
See `./benchmark.py` for usage examples.
See `./benchmark.py` for usage examples.
Current memory consumption is returned using psutil and in particular is the RSS memory
Current memory consumption is returned using psutil and in particular is the RSS memory
"Resident Set Size” (the non-swapped physical memory the process is using).
"Resident Set Size” (the non-swapped physical memory the process is using).
...
@@ -371,7 +371,7 @@ def start_memory_tracing(
...
@@ -371,7 +371,7 @@ def start_memory_tracing(
memory_trace
=
[]
memory_trace
=
[]
def
traceit
(
frame
,
event
,
args
):
def
traceit
(
frame
,
event
,
args
):
"""
Tracing method executed before running each line in a module or sub-module
"""Tracing method executed before running each line in a module or sub-module
Record memory allocated in a list with debugging information
Record memory allocated in a list with debugging information
"""
"""
global
_is_memory_tracing_enabled
global
_is_memory_tracing_enabled
...
@@ -456,7 +456,7 @@ def start_memory_tracing(
...
@@ -456,7 +456,7 @@ def start_memory_tracing(
def
stop_memory_tracing
(
def
stop_memory_tracing
(
memory_trace
:
Optional
[
MemoryTrace
]
=
None
,
ignore_released_memory
:
bool
=
True
memory_trace
:
Optional
[
MemoryTrace
]
=
None
,
ignore_released_memory
:
bool
=
True
)
->
Optional
[
MemorySummary
]:
)
->
Optional
[
MemorySummary
]:
"""
Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
"""Stop memory tracing cleanly and return a summary of the memory trace if a trace is given.
Args:
Args:
- `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
- `memory_trace` (optional output of start_memory_tracing, default: None): memory trace to convert in summary
...
@@ -499,15 +499,19 @@ def stop_memory_tracing(
...
@@ -499,15 +499,19 @@ def stop_memory_tracing(
cumulative_memory_dict
=
defaultdict
(
lambda
:
[
0
,
0
,
0
])
cumulative_memory_dict
=
defaultdict
(
lambda
:
[
0
,
0
,
0
])
for
((
frame
,
cpu_mem
,
gpu_mem
),
(
next_frame
,
next_cpu_mem
,
next_gpu_mem
),)
in
zip
(
for
(
memory_trace
[:
-
1
],
memory_trace
[
1
:]
(
frame
,
cpu_mem
,
gpu_mem
),
):
(
next_frame
,
next_cpu_mem
,
next_gpu_mem
),
)
in
zip
(
memory_trace
[:
-
1
],
memory_trace
[
1
:]):
cpu_mem_inc
=
next_cpu_mem
-
cpu_mem
cpu_mem_inc
=
next_cpu_mem
-
cpu_mem
gpu_mem_inc
=
next_gpu_mem
-
gpu_mem
gpu_mem_inc
=
next_gpu_mem
-
gpu_mem
cpu_gpu_mem_inc
=
cpu_mem_inc
+
gpu_mem_inc
cpu_gpu_mem_inc
=
cpu_mem_inc
+
gpu_mem_inc
memory_diff_trace
.
append
(
memory_diff_trace
.
append
(
MemoryState
(
MemoryState
(
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
)
)
)
)
...
@@ -529,7 +533,10 @@ def stop_memory_tracing(
...
@@ -529,7 +533,10 @@ def stop_memory_tracing(
)
# order by the total CPU + GPU memory increase
)
# order by the total CPU + GPU memory increase
cumulative_memory
=
list
(
cumulative_memory
=
list
(
MemoryState
(
MemoryState
(
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
frame
=
frame
,
cpu
=
Memory
(
cpu_mem_inc
),
gpu
=
Memory
(
gpu_mem_inc
),
cpu_gpu
=
Memory
(
cpu_gpu_mem_inc
),
)
)
for
frame
,
(
cpu_mem_inc
,
gpu_mem_inc
,
cpu_gpu_mem_inc
)
in
cumulative_memory
for
frame
,
(
cpu_mem_inc
,
gpu_mem_inc
,
cpu_gpu_mem_inc
)
in
cumulative_memory
)
)
...
@@ -544,15 +551,17 @@ def stop_memory_tracing(
...
@@ -544,15 +551,17 @@ def stop_memory_tracing(
total_memory
=
Memory
(
total_memory
)
total_memory
=
Memory
(
total_memory
)
return
MemorySummary
(
return
MemorySummary
(
sequential
=
memory_diff_trace
,
cumulative
=
cumulative_memory
,
current
=
memory_curr_trace
,
total
=
total_memory
,
sequential
=
memory_diff_trace
,
cumulative
=
cumulative_memory
,
current
=
memory_curr_trace
,
total
=
total_memory
,
)
)
return
None
return
None
def
bytes_to_mega_bytes
(
memory_amount
:
int
)
->
int
:
def
bytes_to_mega_bytes
(
memory_amount
:
int
)
->
int
:
""" Utility to convert a number of bytes (int) into a number of mega bytes (int)
"""Utility to convert a number of bytes (int) into a number of mega bytes (int)"""
"""
return
memory_amount
>>
20
return
memory_amount
>>
20
...
...
src/transformers/configuration_albert.py
View file @
a75c64d8
src/transformers/configuration_auto.py
View file @
a75c64d8
...
@@ -73,30 +73,99 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
...
@@ -73,30 +73,99 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
CONFIG_MAPPING
=
OrderedDict
(
CONFIG_MAPPING
=
OrderedDict
(
[
[
(
"retribert"
,
RetriBertConfig
,),
(
(
"t5"
,
T5Config
,),
"retribert"
,
(
"mobilebert"
,
MobileBertConfig
,),
RetriBertConfig
,
(
"distilbert"
,
DistilBertConfig
,),
),
(
"albert"
,
AlbertConfig
,),
(
(
"camembert"
,
CamembertConfig
,),
"t5"
,
(
"xlm-roberta"
,
XLMRobertaConfig
,),
T5Config
,
),
(
"mobilebert"
,
MobileBertConfig
,
),
(
"distilbert"
,
DistilBertConfig
,
),
(
"albert"
,
AlbertConfig
,
),
(
"camembert"
,
CamembertConfig
,
),
(
"xlm-roberta"
,
XLMRobertaConfig
,
),
(
"pegasus"
,
PegasusConfig
),
(
"pegasus"
,
PegasusConfig
),
(
"marian"
,
MarianConfig
,),
(
(
"mbart"
,
MBartConfig
,),
"marian"
,
(
"bart"
,
BartConfig
,),
MarianConfig
,
(
"reformer"
,
ReformerConfig
,),
),
(
"longformer"
,
LongformerConfig
,),
(
(
"roberta"
,
RobertaConfig
,),
"mbart"
,
(
"flaubert"
,
FlaubertConfig
,),
MBartConfig
,
(
"bert"
,
BertConfig
,),
),
(
"openai-gpt"
,
OpenAIGPTConfig
,),
(
(
"gpt2"
,
GPT2Config
,),
"bart"
,
(
"transfo-xl"
,
TransfoXLConfig
,),
BartConfig
,
(
"xlnet"
,
XLNetConfig
,),
),
(
"xlm"
,
XLMConfig
,),
(
(
"ctrl"
,
CTRLConfig
,),
"reformer"
,
(
"electra"
,
ElectraConfig
,),
ReformerConfig
,
(
"encoder-decoder"
,
EncoderDecoderConfig
,),
),
(
"longformer"
,
LongformerConfig
,
),
(
"roberta"
,
RobertaConfig
,
),
(
"flaubert"
,
FlaubertConfig
,
),
(
"bert"
,
BertConfig
,
),
(
"openai-gpt"
,
OpenAIGPTConfig
,
),
(
"gpt2"
,
GPT2Config
,
),
(
"transfo-xl"
,
TransfoXLConfig
,
),
(
"xlnet"
,
XLNetConfig
,
),
(
"xlm"
,
XLMConfig
,
),
(
"ctrl"
,
CTRLConfig
,
),
(
"electra"
,
ElectraConfig
,
),
(
"encoder-decoder"
,
EncoderDecoderConfig
,
),
]
]
)
)
...
...
src/transformers/configuration_bart.py
View file @
a75c64d8
src/transformers/configuration_bert.py
View file @
a75c64d8
src/transformers/configuration_ctrl.py
View file @
a75c64d8
src/transformers/configuration_distilbert.py
View file @
a75c64d8
src/transformers/configuration_dpr.py
View file @
a75c64d8
src/transformers/configuration_electra.py
View file @
a75c64d8
src/transformers/configuration_encoder_decoder.py
View file @
a75c64d8
src/transformers/configuration_flaubert.py
View file @
a75c64d8
...
@@ -143,8 +143,7 @@ class FlaubertConfig(XLMConfig):
...
@@ -143,8 +143,7 @@ class FlaubertConfig(XLMConfig):
model_type
=
"flaubert"
model_type
=
"flaubert"
def
__init__
(
self
,
layerdrop
=
0.0
,
pre_norm
=
False
,
pad_token_id
=
2
,
bos_token_id
=
0
,
**
kwargs
):
def
__init__
(
self
,
layerdrop
=
0.0
,
pre_norm
=
False
,
pad_token_id
=
2
,
bos_token_id
=
0
,
**
kwargs
):
"""Constructs FlaubertConfig.
"""Constructs FlaubertConfig."""
"""
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
**
kwargs
)
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
**
kwargs
)
self
.
layerdrop
=
layerdrop
self
.
layerdrop
=
layerdrop
self
.
pre_norm
=
pre_norm
self
.
pre_norm
=
pre_norm
src/transformers/configuration_gpt2.py
View file @
a75c64d8
src/transformers/configuration_longformer.py
View file @
a75c64d8
src/transformers/configuration_mobilebert.py
View file @
a75c64d8
src/transformers/configuration_openai.py
View file @
a75c64d8
Prev
1
2
3
4
5
6
7
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment