Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
dc17f2a1
"tests/test_tokenization_phobert.py" did not exist on "5dd7b677adbd2a228328e42b79583143c16b8dff"
Unverified
Commit
dc17f2a1
authored
Jan 16, 2020
by
Thomas Wolf
Committed by
GitHub
Jan 16, 2020
Browse files
Merge pull request #2538 from huggingface/py3_super
💄
super
parents
88085484
a98b2ca8
Changes
75
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
71 additions
and
71 deletions
+71
-71
examples/mm-imdb/utils_mmimdb.py
examples/mm-imdb/utils_mmimdb.py
+1
-1
examples/pplm/pplm_classification_head.py
examples/pplm/pplm_classification_head.py
+1
-1
examples/pplm/run_pplm_discrim_train.py
examples/pplm/run_pplm_discrim_train.py
+1
-1
examples/summarization/configuration_bertabs.py
examples/summarization/configuration_bertabs.py
+1
-1
examples/summarization/modeling_bertabs.py
examples/summarization/modeling_bertabs.py
+7
-7
src/transformers/configuration_albert.py
src/transformers/configuration_albert.py
+1
-1
src/transformers/configuration_bert.py
src/transformers/configuration_bert.py
+1
-1
src/transformers/configuration_ctrl.py
src/transformers/configuration_ctrl.py
+1
-1
src/transformers/configuration_distilbert.py
src/transformers/configuration_distilbert.py
+1
-1
src/transformers/configuration_gpt2.py
src/transformers/configuration_gpt2.py
+1
-1
src/transformers/configuration_openai.py
src/transformers/configuration_openai.py
+1
-1
src/transformers/configuration_t5.py
src/transformers/configuration_t5.py
+1
-1
src/transformers/configuration_transfo_xl.py
src/transformers/configuration_transfo_xl.py
+1
-1
src/transformers/configuration_xlm.py
src/transformers/configuration_xlm.py
+1
-1
src/transformers/configuration_xlnet.py
src/transformers/configuration_xlnet.py
+1
-1
src/transformers/modeling_albert.py
src/transformers/modeling_albert.py
+10
-10
src/transformers/modeling_bert.py
src/transformers/modeling_bert.py
+22
-22
src/transformers/modeling_ctrl.py
src/transformers/modeling_ctrl.py
+4
-4
src/transformers/modeling_distilbert.py
src/transformers/modeling_distilbert.py
+10
-10
src/transformers/modeling_encoder_decoder.py
src/transformers/modeling_encoder_decoder.py
+4
-4
No files found.
examples/mm-imdb/utils_mmimdb.py
View file @
dc17f2a1
...
...
@@ -31,7 +31,7 @@ POOLING_BREAKDOWN = {1: (1, 1), 2: (2, 1), 3: (3, 1), 4: (2, 2), 5: (5, 1), 6: (
class
ImageEncoder
(
nn
.
Module
):
def
__init__
(
self
,
args
):
super
(
ImageEncoder
,
self
).
__init__
()
super
().
__init__
()
model
=
torchvision
.
models
.
resnet152
(
pretrained
=
True
)
modules
=
list
(
model
.
children
())[:
-
2
]
self
.
model
=
nn
.
Sequential
(
*
modules
)
...
...
examples/pplm/pplm_classification_head.py
View file @
dc17f2a1
...
...
@@ -5,7 +5,7 @@ class ClassificationHead(torch.nn.Module):
"""Classification Head for transformer encoders"""
def
__init__
(
self
,
class_size
,
embed_size
):
super
(
ClassificationHead
,
self
).
__init__
()
super
().
__init__
()
self
.
class_size
=
class_size
self
.
embed_size
=
embed_size
# self.mlp1 = torch.nn.Linear(embed_size, embed_size)
...
...
examples/pplm/run_pplm_discrim_train.py
View file @
dc17f2a1
...
...
@@ -46,7 +46,7 @@ class Discriminator(torch.nn.Module):
"""Transformer encoder followed by a Classification Head"""
def
__init__
(
self
,
class_size
,
pretrained_model
=
"gpt2-medium"
,
cached_mode
=
False
,
device
=
"cpu"
):
super
(
Discriminator
,
self
).
__init__
()
super
().
__init__
()
self
.
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
pretrained_model
)
self
.
encoder
=
GPT2LMHeadModel
.
from_pretrained
(
pretrained_model
)
self
.
embed_size
=
self
.
encoder
.
transformer
.
config
.
hidden_size
...
...
examples/summarization/configuration_bertabs.py
View file @
dc17f2a1
...
...
@@ -80,7 +80,7 @@ class BertAbsConfig(PretrainedConfig):
dec_dropout
=
0.2
,
**
kwargs
,
):
super
(
BertAbsConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
max_pos
=
max_pos
...
...
examples/summarization/modeling_bertabs.py
View file @
dc17f2a1
...
...
@@ -47,7 +47,7 @@ class BertAbsPreTrainedModel(PreTrainedModel):
class
BertAbs
(
BertAbsPreTrainedModel
):
def
__init__
(
self
,
args
,
checkpoint
=
None
,
bert_extractive_checkpoint
=
None
):
super
(
BertAbs
,
self
).
__init__
(
args
)
super
().
__init__
(
args
)
self
.
args
=
args
self
.
bert
=
Bert
()
...
...
@@ -122,7 +122,7 @@ class Bert(nn.Module):
"""
def
__init__
(
self
):
super
(
Bert
,
self
).
__init__
()
super
().
__init__
()
config
=
BertConfig
.
from_pretrained
(
"bert-base-uncased"
)
self
.
model
=
BertModel
(
config
)
...
...
@@ -151,7 +151,7 @@ class TransformerDecoder(nn.Module):
"""
def
__init__
(
self
,
num_layers
,
d_model
,
heads
,
d_ff
,
dropout
,
embeddings
,
vocab_size
):
super
(
TransformerDecoder
,
self
).
__init__
()
super
().
__init__
()
# Basic attributes.
self
.
decoder_type
=
"transformer"
...
...
@@ -261,7 +261,7 @@ class PositionalEncoding(nn.Module):
pe
[:,
0
::
2
]
=
torch
.
sin
(
position
.
float
()
*
div_term
)
pe
[:,
1
::
2
]
=
torch
.
cos
(
position
.
float
()
*
div_term
)
pe
=
pe
.
unsqueeze
(
0
)
super
(
PositionalEncoding
,
self
).
__init__
()
super
().
__init__
()
self
.
register_buffer
(
"pe"
,
pe
)
self
.
dropout
=
nn
.
Dropout
(
p
=
dropout
)
self
.
dim
=
dim
...
...
@@ -293,7 +293,7 @@ class TransformerDecoderLayer(nn.Module):
"""
def
__init__
(
self
,
d_model
,
heads
,
d_ff
,
dropout
):
super
(
TransformerDecoderLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
self_attn
=
MultiHeadedAttention
(
heads
,
d_model
,
dropout
=
dropout
)
...
...
@@ -410,7 +410,7 @@ class MultiHeadedAttention(nn.Module):
self
.
dim_per_head
=
model_dim
//
head_count
self
.
model_dim
=
model_dim
super
(
MultiHeadedAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
head_count
=
head_count
self
.
linear_keys
=
nn
.
Linear
(
model_dim
,
head_count
*
self
.
dim_per_head
)
...
...
@@ -639,7 +639,7 @@ class PositionwiseFeedForward(nn.Module):
"""
def
__init__
(
self
,
d_model
,
d_ff
,
dropout
=
0.1
):
super
(
PositionwiseFeedForward
,
self
).
__init__
()
super
().
__init__
()
self
.
w_1
=
nn
.
Linear
(
d_model
,
d_ff
)
self
.
w_2
=
nn
.
Linear
(
d_ff
,
d_model
)
self
.
layer_norm
=
nn
.
LayerNorm
(
d_model
,
eps
=
1e-6
)
...
...
src/transformers/configuration_albert.py
View file @
dc17f2a1
...
...
@@ -122,7 +122,7 @@ class AlbertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
**
kwargs
):
super
(
AlbertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
...
...
src/transformers/configuration_bert.py
View file @
dc17f2a1
...
...
@@ -125,7 +125,7 @@ class BertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
**
kwargs
):
super
(
BertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
...
...
src/transformers/configuration_ctrl.py
View file @
dc17f2a1
...
...
@@ -106,7 +106,7 @@ class CTRLConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
CTRLConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
...
...
src/transformers/configuration_distilbert.py
View file @
dc17f2a1
...
...
@@ -113,7 +113,7 @@ class DistilBertConfig(PretrainedConfig):
seq_classif_dropout
=
0.2
,
**
kwargs
):
super
(
DistilBertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
...
...
src/transformers/configuration_gpt2.py
View file @
dc17f2a1
...
...
@@ -136,7 +136,7 @@ class GPT2Config(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
...
...
src/transformers/configuration_openai.py
View file @
dc17f2a1
...
...
@@ -138,7 +138,7 @@ class OpenAIGPTConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
OpenAIGPTConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
...
...
src/transformers/configuration_t5.py
View file @
dc17f2a1
...
...
@@ -77,7 +77,7 @@ class T5Config(PretrainedConfig):
initializer_factor
=
1.0
,
**
kwargs
):
super
(
T5Config
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
d_model
=
d_model
...
...
src/transformers/configuration_transfo_xl.py
View file @
dc17f2a1
...
...
@@ -151,7 +151,7 @@ class TransfoXLConfig(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
**
kwargs
):
super
(
TransfoXLConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
[]
...
...
src/transformers/configuration_xlm.py
View file @
dc17f2a1
...
...
@@ -197,7 +197,7 @@ class XLMConfig(PretrainedConfig):
):
"""Constructs XLMConfig.
"""
super
(
XLMConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
...
...
src/transformers/configuration_xlnet.py
View file @
dc17f2a1
...
...
@@ -159,7 +159,7 @@ class XLNetConfig(PretrainedConfig):
):
"""Constructs XLNetConfig.
"""
super
(
XLNetConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
d_model
=
d_model
self
.
n_layer
=
n_layer
...
...
src/transformers/modeling_albert.py
View file @
dc17f2a1
...
...
@@ -167,7 +167,7 @@ class AlbertEmbeddings(BertEmbeddings):
"""
def
__init__
(
self
,
config
):
super
(
AlbertEmbeddings
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
embedding_size
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
embedding_size
)
...
...
@@ -177,7 +177,7 @@ class AlbertEmbeddings(BertEmbeddings):
class
AlbertAttention
(
BertSelfAttention
):
def
__init__
(
self
,
config
):
super
(
AlbertAttention
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
num_attention_heads
=
config
.
num_attention_heads
...
...
@@ -258,7 +258,7 @@ class AlbertAttention(BertSelfAttention):
class
AlbertLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
full_layer_layer_norm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
...
...
@@ -279,7 +279,7 @@ class AlbertLayer(nn.Module):
class
AlbertLayerGroup
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertLayerGroup
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
...
...
@@ -309,7 +309,7 @@ class AlbertLayerGroup(nn.Module):
class
AlbertTransformer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertTransformer
,
self
).
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
output_attentions
=
config
.
output_attentions
...
...
@@ -471,7 +471,7 @@ class AlbertModel(AlbertPreTrainedModel):
base_model_prefix
=
"albert"
def
__init__
(
self
,
config
):
super
(
AlbertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
config
=
config
self
.
embeddings
=
AlbertEmbeddings
(
config
)
...
...
@@ -571,7 +571,7 @@ class AlbertModel(AlbertPreTrainedModel):
class
AlbertMLMHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertMLMHead
,
self
).
__init__
()
super
().
__init__
()
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
embedding_size
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
...
...
@@ -619,7 +619,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
AlbertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
albert
=
AlbertModel
(
config
)
self
.
predictions
=
AlbertMLMHead
(
config
)
...
...
@@ -706,7 +706,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
AlbertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
albert
=
AlbertModel
(
config
)
...
...
@@ -804,7 +804,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
AlbertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
albert
=
AlbertModel
(
config
)
...
...
src/transformers/modeling_bert.py
View file @
dc17f2a1
...
...
@@ -160,7 +160,7 @@ class BertEmbeddings(nn.Module):
"""
def
__init__
(
self
,
config
):
super
(
BertEmbeddings
,
self
).
__init__
()
super
().
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
hidden_size
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
hidden_size
)
self
.
token_type_embeddings
=
nn
.
Embedding
(
config
.
type_vocab_size
,
config
.
hidden_size
)
...
...
@@ -197,7 +197,7 @@ class BertEmbeddings(nn.Module):
class
BertSelfAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertSelfAttention
,
self
).
__init__
()
super
().
__init__
()
if
config
.
hidden_size
%
config
.
num_attention_heads
!=
0
:
raise
ValueError
(
"The hidden size (%d) is not a multiple of the number of attention "
...
...
@@ -275,7 +275,7 @@ class BertSelfAttention(nn.Module):
class
BertSelfOutput
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertSelfOutput
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
...
@@ -289,7 +289,7 @@ class BertSelfOutput(nn.Module):
class
BertAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
self
=
BertSelfAttention
(
config
)
self
.
output
=
BertSelfOutput
(
config
)
self
.
pruned_heads
=
set
()
...
...
@@ -335,7 +335,7 @@ class BertAttention(nn.Module):
class
BertIntermediate
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertIntermediate
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
intermediate_size
)
if
isinstance
(
config
.
hidden_act
,
str
):
self
.
intermediate_act_fn
=
ACT2FN
[
config
.
hidden_act
]
...
...
@@ -350,7 +350,7 @@ class BertIntermediate(nn.Module):
class
BertOutput
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertOutput
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
...
@@ -364,7 +364,7 @@ class BertOutput(nn.Module):
class
BertLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
attention
=
BertAttention
(
config
)
self
.
is_decoder
=
config
.
is_decoder
if
self
.
is_decoder
:
...
...
@@ -399,7 +399,7 @@ class BertLayer(nn.Module):
class
BertEncoder
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertEncoder
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
layer
=
nn
.
ModuleList
([
BertLayer
(
config
)
for
_
in
range
(
config
.
num_hidden_layers
)])
...
...
@@ -440,7 +440,7 @@ class BertEncoder(nn.Module):
class
BertPooler
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertPooler
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
activation
=
nn
.
Tanh
()
...
...
@@ -455,7 +455,7 @@ class BertPooler(nn.Module):
class
BertPredictionHeadTransform
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertPredictionHeadTransform
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
if
isinstance
(
config
.
hidden_act
,
str
):
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
...
...
@@ -472,7 +472,7 @@ class BertPredictionHeadTransform(nn.Module):
class
BertLMPredictionHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertLMPredictionHead
,
self
).
__init__
()
super
().
__init__
()
self
.
transform
=
BertPredictionHeadTransform
(
config
)
# The output weights are the same as the input embeddings, but there is
...
...
@@ -492,7 +492,7 @@ class BertLMPredictionHead(nn.Module):
class
BertOnlyMLMHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertOnlyMLMHead
,
self
).
__init__
()
super
().
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
)
def
forward
(
self
,
sequence_output
):
...
...
@@ -502,7 +502,7 @@ class BertOnlyMLMHead(nn.Module):
class
BertOnlyNSPHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertOnlyNSPHead
,
self
).
__init__
()
super
().
__init__
()
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
def
forward
(
self
,
pooled_output
):
...
...
@@ -512,7 +512,7 @@ class BertOnlyNSPHead(nn.Module):
class
BertPreTrainingHeads
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertPreTrainingHeads
,
self
).
__init__
()
super
().
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
)
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
...
...
@@ -657,7 +657,7 @@ class BertModel(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
config
=
config
self
.
embeddings
=
BertEmbeddings
(
config
)
...
...
@@ -864,7 +864,7 @@ class BertForPreTraining(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForPreTraining
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertPreTrainingHeads
(
config
)
...
...
@@ -954,7 +954,7 @@ class BertForMaskedLM(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertOnlyMLMHead
(
config
)
...
...
@@ -1053,7 +1053,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForNextSentencePrediction
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertOnlyNSPHead
(
config
)
...
...
@@ -1132,7 +1132,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
...
...
@@ -1221,7 +1221,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForMultipleChoice
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
...
@@ -1308,7 +1308,7 @@ class BertForTokenClassification(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
...
...
@@ -1406,7 +1406,7 @@ class BertForQuestionAnswering(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
...
...
src/transformers/modeling_ctrl.py
View file @
dc17f2a1
...
...
@@ -81,7 +81,7 @@ def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, head_mask=N
class
MultiHeadAttention
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
output_attentions
=
False
):
super
(
MultiHeadAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
output_attentions
self
.
num_heads
=
num_heads
self
.
d_model_size
=
d_model_size
...
...
@@ -132,7 +132,7 @@ def point_wise_feed_forward_network(d_model_size, dff):
class
EncoderLayer
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
dff
,
rate
=
0.1
,
output_attentions
=
False
):
super
(
EncoderLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
multi_head_attention
=
MultiHeadAttention
(
d_model_size
,
num_heads
,
output_attentions
)
self
.
ffn
=
point_wise_feed_forward_network
(
d_model_size
,
dff
)
...
...
@@ -274,7 +274,7 @@ class CTRLModel(CTRLPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
CTRLModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_attentions
=
config
.
output_attentions
self
.
output_past
=
config
.
output_past
...
...
@@ -481,7 +481,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
CTRLLMHeadModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
CTRLModel
(
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
True
)
...
...
src/transformers/modeling_distilbert.py
View file @
dc17f2a1
...
...
@@ -59,7 +59,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
class
Embeddings
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
Embeddings
,
self
).
__init__
()
super
().
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
dim
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
dim
)
if
config
.
sinusoidal_pos_embds
:
...
...
@@ -97,7 +97,7 @@ class Embeddings(nn.Module):
class
MultiHeadSelfAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
MultiHeadSelfAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
n_heads
=
config
.
n_heads
self
.
dim
=
config
.
dim
...
...
@@ -195,7 +195,7 @@ class MultiHeadSelfAttention(nn.Module):
class
FFN
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
FFN
,
self
).
__init__
()
super
().
__init__
()
self
.
dropout
=
nn
.
Dropout
(
p
=
config
.
dropout
)
self
.
lin1
=
nn
.
Linear
(
in_features
=
config
.
dim
,
out_features
=
config
.
hidden_dim
)
self
.
lin2
=
nn
.
Linear
(
in_features
=
config
.
hidden_dim
,
out_features
=
config
.
dim
)
...
...
@@ -214,7 +214,7 @@ class FFN(nn.Module):
class
TransformerBlock
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
TransformerBlock
,
self
).
__init__
()
super
().
__init__
()
self
.
n_heads
=
config
.
n_heads
self
.
dim
=
config
.
dim
...
...
@@ -266,7 +266,7 @@ class TransformerBlock(nn.Module):
class
Transformer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
Transformer
,
self
).
__init__
()
super
().
__init__
()
self
.
n_layers
=
config
.
n_layers
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
...
...
@@ -424,7 +424,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
embeddings
=
Embeddings
(
config
)
# Embeddings
self
.
transformer
=
Transformer
(
config
)
# Encoder
...
...
@@ -525,7 +525,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
...
...
@@ -600,7 +600,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
distilbert
=
DistilBertModel
(
config
)
...
...
@@ -679,7 +679,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
distilbert
=
DistilBertModel
(
config
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
...
...
@@ -766,7 +766,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
distilbert
=
DistilBertModel
(
config
)
...
...
src/transformers/modeling_encoder_decoder.py
View file @
dc17f2a1
...
...
@@ -37,7 +37,7 @@ class PreTrainedEncoderDecoder(nn.Module):
"""
def
__init__
(
self
,
encoder
,
decoder
):
super
(
PreTrainedEncoderDecoder
,
self
).
__init__
()
super
().
__init__
()
self
.
encoder
=
encoder
self
.
decoder
=
decoder
...
...
@@ -290,7 +290,7 @@ class Model2Model(PreTrainedEncoderDecoder):
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Model2Model
,
self
).
__init__
(
*
args
,
**
kwargs
)
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
tie_weights
()
def
tie_weights
(
self
):
...
...
@@ -321,7 +321,7 @@ class Model2Model(PreTrainedEncoderDecoder):
):
raise
ValueError
(
"Only the Bert model is currently supported."
)
model
=
super
(
Model2Model
,
cls
).
from_pretrained
(
model
=
super
().
from_pretrained
(
encoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
decoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
*
args
,
...
...
@@ -345,5 +345,5 @@ class Model2LSTM(PreTrainedEncoderDecoder):
" E.g. `decoder_config={'input_size': 768, 'hidden_size': 768, 'num_layers': 2}`"
)
kwargs
[
"decoder_model"
]
=
torch
.
nn
.
LSTM
(
kwargs
.
pop
(
"decoder_config"
))
model
=
super
(
Model2LSTM
,
cls
).
from_pretrained
(
*
args
,
**
kwargs
)
model
=
super
().
from_pretrained
(
*
args
,
**
kwargs
)
return
model
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment