Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
dc17f2a1
Unverified
Commit
dc17f2a1
authored
Jan 16, 2020
by
Thomas Wolf
Committed by
GitHub
Jan 16, 2020
Browse files
Merge pull request #2538 from huggingface/py3_super
💄
super
parents
88085484
a98b2ca8
Changes
75
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
71 additions
and
71 deletions
+71
-71
examples/mm-imdb/utils_mmimdb.py
examples/mm-imdb/utils_mmimdb.py
+1
-1
examples/pplm/pplm_classification_head.py
examples/pplm/pplm_classification_head.py
+1
-1
examples/pplm/run_pplm_discrim_train.py
examples/pplm/run_pplm_discrim_train.py
+1
-1
examples/summarization/configuration_bertabs.py
examples/summarization/configuration_bertabs.py
+1
-1
examples/summarization/modeling_bertabs.py
examples/summarization/modeling_bertabs.py
+7
-7
src/transformers/configuration_albert.py
src/transformers/configuration_albert.py
+1
-1
src/transformers/configuration_bert.py
src/transformers/configuration_bert.py
+1
-1
src/transformers/configuration_ctrl.py
src/transformers/configuration_ctrl.py
+1
-1
src/transformers/configuration_distilbert.py
src/transformers/configuration_distilbert.py
+1
-1
src/transformers/configuration_gpt2.py
src/transformers/configuration_gpt2.py
+1
-1
src/transformers/configuration_openai.py
src/transformers/configuration_openai.py
+1
-1
src/transformers/configuration_t5.py
src/transformers/configuration_t5.py
+1
-1
src/transformers/configuration_transfo_xl.py
src/transformers/configuration_transfo_xl.py
+1
-1
src/transformers/configuration_xlm.py
src/transformers/configuration_xlm.py
+1
-1
src/transformers/configuration_xlnet.py
src/transformers/configuration_xlnet.py
+1
-1
src/transformers/modeling_albert.py
src/transformers/modeling_albert.py
+10
-10
src/transformers/modeling_bert.py
src/transformers/modeling_bert.py
+22
-22
src/transformers/modeling_ctrl.py
src/transformers/modeling_ctrl.py
+4
-4
src/transformers/modeling_distilbert.py
src/transformers/modeling_distilbert.py
+10
-10
src/transformers/modeling_encoder_decoder.py
src/transformers/modeling_encoder_decoder.py
+4
-4
No files found.
examples/mm-imdb/utils_mmimdb.py
View file @
dc17f2a1
...
...
@@ -31,7 +31,7 @@ POOLING_BREAKDOWN = {1: (1, 1), 2: (2, 1), 3: (3, 1), 4: (2, 2), 5: (5, 1), 6: (
class
ImageEncoder
(
nn
.
Module
):
def
__init__
(
self
,
args
):
super
(
ImageEncoder
,
self
).
__init__
()
super
().
__init__
()
model
=
torchvision
.
models
.
resnet152
(
pretrained
=
True
)
modules
=
list
(
model
.
children
())[:
-
2
]
self
.
model
=
nn
.
Sequential
(
*
modules
)
...
...
examples/pplm/pplm_classification_head.py
View file @
dc17f2a1
...
...
@@ -5,7 +5,7 @@ class ClassificationHead(torch.nn.Module):
"""Classification Head for transformer encoders"""
def
__init__
(
self
,
class_size
,
embed_size
):
super
(
ClassificationHead
,
self
).
__init__
()
super
().
__init__
()
self
.
class_size
=
class_size
self
.
embed_size
=
embed_size
# self.mlp1 = torch.nn.Linear(embed_size, embed_size)
...
...
examples/pplm/run_pplm_discrim_train.py
View file @
dc17f2a1
...
...
@@ -46,7 +46,7 @@ class Discriminator(torch.nn.Module):
"""Transformer encoder followed by a Classification Head"""
def
__init__
(
self
,
class_size
,
pretrained_model
=
"gpt2-medium"
,
cached_mode
=
False
,
device
=
"cpu"
):
super
(
Discriminator
,
self
).
__init__
()
super
().
__init__
()
self
.
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
pretrained_model
)
self
.
encoder
=
GPT2LMHeadModel
.
from_pretrained
(
pretrained_model
)
self
.
embed_size
=
self
.
encoder
.
transformer
.
config
.
hidden_size
...
...
examples/summarization/configuration_bertabs.py
View file @
dc17f2a1
...
...
@@ -80,7 +80,7 @@ class BertAbsConfig(PretrainedConfig):
dec_dropout
=
0.2
,
**
kwargs
,
):
super
(
BertAbsConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
max_pos
=
max_pos
...
...
examples/summarization/modeling_bertabs.py
View file @
dc17f2a1
...
...
@@ -47,7 +47,7 @@ class BertAbsPreTrainedModel(PreTrainedModel):
class
BertAbs
(
BertAbsPreTrainedModel
):
def
__init__
(
self
,
args
,
checkpoint
=
None
,
bert_extractive_checkpoint
=
None
):
super
(
BertAbs
,
self
).
__init__
(
args
)
super
().
__init__
(
args
)
self
.
args
=
args
self
.
bert
=
Bert
()
...
...
@@ -122,7 +122,7 @@ class Bert(nn.Module):
"""
def
__init__
(
self
):
super
(
Bert
,
self
).
__init__
()
super
().
__init__
()
config
=
BertConfig
.
from_pretrained
(
"bert-base-uncased"
)
self
.
model
=
BertModel
(
config
)
...
...
@@ -151,7 +151,7 @@ class TransformerDecoder(nn.Module):
"""
def
__init__
(
self
,
num_layers
,
d_model
,
heads
,
d_ff
,
dropout
,
embeddings
,
vocab_size
):
super
(
TransformerDecoder
,
self
).
__init__
()
super
().
__init__
()
# Basic attributes.
self
.
decoder_type
=
"transformer"
...
...
@@ -261,7 +261,7 @@ class PositionalEncoding(nn.Module):
pe
[:,
0
::
2
]
=
torch
.
sin
(
position
.
float
()
*
div_term
)
pe
[:,
1
::
2
]
=
torch
.
cos
(
position
.
float
()
*
div_term
)
pe
=
pe
.
unsqueeze
(
0
)
super
(
PositionalEncoding
,
self
).
__init__
()
super
().
__init__
()
self
.
register_buffer
(
"pe"
,
pe
)
self
.
dropout
=
nn
.
Dropout
(
p
=
dropout
)
self
.
dim
=
dim
...
...
@@ -293,7 +293,7 @@ class TransformerDecoderLayer(nn.Module):
"""
def
__init__
(
self
,
d_model
,
heads
,
d_ff
,
dropout
):
super
(
TransformerDecoderLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
self_attn
=
MultiHeadedAttention
(
heads
,
d_model
,
dropout
=
dropout
)
...
...
@@ -410,7 +410,7 @@ class MultiHeadedAttention(nn.Module):
self
.
dim_per_head
=
model_dim
//
head_count
self
.
model_dim
=
model_dim
super
(
MultiHeadedAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
head_count
=
head_count
self
.
linear_keys
=
nn
.
Linear
(
model_dim
,
head_count
*
self
.
dim_per_head
)
...
...
@@ -639,7 +639,7 @@ class PositionwiseFeedForward(nn.Module):
"""
def
__init__
(
self
,
d_model
,
d_ff
,
dropout
=
0.1
):
super
(
PositionwiseFeedForward
,
self
).
__init__
()
super
().
__init__
()
self
.
w_1
=
nn
.
Linear
(
d_model
,
d_ff
)
self
.
w_2
=
nn
.
Linear
(
d_ff
,
d_model
)
self
.
layer_norm
=
nn
.
LayerNorm
(
d_model
,
eps
=
1e-6
)
...
...
src/transformers/configuration_albert.py
View file @
dc17f2a1
...
...
@@ -122,7 +122,7 @@ class AlbertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
**
kwargs
):
super
(
AlbertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
...
...
src/transformers/configuration_bert.py
View file @
dc17f2a1
...
...
@@ -125,7 +125,7 @@ class BertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
**
kwargs
):
super
(
BertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
...
...
src/transformers/configuration_ctrl.py
View file @
dc17f2a1
...
...
@@ -106,7 +106,7 @@ class CTRLConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
CTRLConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
...
...
src/transformers/configuration_distilbert.py
View file @
dc17f2a1
...
...
@@ -113,7 +113,7 @@ class DistilBertConfig(PretrainedConfig):
seq_classif_dropout
=
0.2
,
**
kwargs
):
super
(
DistilBertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
...
...
src/transformers/configuration_gpt2.py
View file @
dc17f2a1
...
...
@@ -136,7 +136,7 @@ class GPT2Config(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
...
...
src/transformers/configuration_openai.py
View file @
dc17f2a1
...
...
@@ -138,7 +138,7 @@ class OpenAIGPTConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
OpenAIGPTConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
...
...
src/transformers/configuration_t5.py
View file @
dc17f2a1
...
...
@@ -77,7 +77,7 @@ class T5Config(PretrainedConfig):
initializer_factor
=
1.0
,
**
kwargs
):
super
(
T5Config
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
d_model
=
d_model
...
...
src/transformers/configuration_transfo_xl.py
View file @
dc17f2a1
...
...
@@ -151,7 +151,7 @@ class TransfoXLConfig(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
**
kwargs
):
super
(
TransfoXLConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
[]
...
...
src/transformers/configuration_xlm.py
View file @
dc17f2a1
...
...
@@ -197,7 +197,7 @@ class XLMConfig(PretrainedConfig):
):
"""Constructs XLMConfig.
"""
super
(
XLMConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
...
...
src/transformers/configuration_xlnet.py
View file @
dc17f2a1
...
...
@@ -159,7 +159,7 @@ class XLNetConfig(PretrainedConfig):
):
"""Constructs XLNetConfig.
"""
super
(
XLNetConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
d_model
=
d_model
self
.
n_layer
=
n_layer
...
...
src/transformers/modeling_albert.py
View file @
dc17f2a1
...
...
@@ -167,7 +167,7 @@ class AlbertEmbeddings(BertEmbeddings):
"""
def
__init__
(
self
,
config
):
super
(
AlbertEmbeddings
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
embedding_size
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
embedding_size
)
...
...
@@ -177,7 +177,7 @@ class AlbertEmbeddings(BertEmbeddings):
class
AlbertAttention
(
BertSelfAttention
):
def
__init__
(
self
,
config
):
super
(
AlbertAttention
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
num_attention_heads
=
config
.
num_attention_heads
...
...
@@ -258,7 +258,7 @@ class AlbertAttention(BertSelfAttention):
class
AlbertLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
full_layer_layer_norm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
...
...
@@ -279,7 +279,7 @@ class AlbertLayer(nn.Module):
class
AlbertLayerGroup
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertLayerGroup
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
...
...
@@ -309,7 +309,7 @@ class AlbertLayerGroup(nn.Module):
class
AlbertTransformer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertTransformer
,
self
).
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
output_attentions
=
config
.
output_attentions
...
...
@@ -471,7 +471,7 @@ class AlbertModel(AlbertPreTrainedModel):
base_model_prefix
=
"albert"
def
__init__
(
self
,
config
):
super
(
AlbertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
config
=
config
self
.
embeddings
=
AlbertEmbeddings
(
config
)
...
...
@@ -571,7 +571,7 @@ class AlbertModel(AlbertPreTrainedModel):
class
AlbertMLMHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
AlbertMLMHead
,
self
).
__init__
()
super
().
__init__
()
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
embedding_size
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
...
...
@@ -619,7 +619,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
AlbertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
albert
=
AlbertModel
(
config
)
self
.
predictions
=
AlbertMLMHead
(
config
)
...
...
@@ -706,7 +706,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
AlbertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
albert
=
AlbertModel
(
config
)
...
...
@@ -804,7 +804,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
AlbertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
albert
=
AlbertModel
(
config
)
...
...
src/transformers/modeling_bert.py
View file @
dc17f2a1
...
...
@@ -160,7 +160,7 @@ class BertEmbeddings(nn.Module):
"""
def
__init__
(
self
,
config
):
super
(
BertEmbeddings
,
self
).
__init__
()
super
().
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
hidden_size
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
hidden_size
)
self
.
token_type_embeddings
=
nn
.
Embedding
(
config
.
type_vocab_size
,
config
.
hidden_size
)
...
...
@@ -197,7 +197,7 @@ class BertEmbeddings(nn.Module):
class
BertSelfAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertSelfAttention
,
self
).
__init__
()
super
().
__init__
()
if
config
.
hidden_size
%
config
.
num_attention_heads
!=
0
:
raise
ValueError
(
"The hidden size (%d) is not a multiple of the number of attention "
...
...
@@ -275,7 +275,7 @@ class BertSelfAttention(nn.Module):
class
BertSelfOutput
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertSelfOutput
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
...
@@ -289,7 +289,7 @@ class BertSelfOutput(nn.Module):
class
BertAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
self
=
BertSelfAttention
(
config
)
self
.
output
=
BertSelfOutput
(
config
)
self
.
pruned_heads
=
set
()
...
...
@@ -335,7 +335,7 @@ class BertAttention(nn.Module):
class
BertIntermediate
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertIntermediate
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
intermediate_size
)
if
isinstance
(
config
.
hidden_act
,
str
):
self
.
intermediate_act_fn
=
ACT2FN
[
config
.
hidden_act
]
...
...
@@ -350,7 +350,7 @@ class BertIntermediate(nn.Module):
class
BertOutput
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertOutput
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
...
@@ -364,7 +364,7 @@ class BertOutput(nn.Module):
class
BertLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
attention
=
BertAttention
(
config
)
self
.
is_decoder
=
config
.
is_decoder
if
self
.
is_decoder
:
...
...
@@ -399,7 +399,7 @@ class BertLayer(nn.Module):
class
BertEncoder
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertEncoder
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
layer
=
nn
.
ModuleList
([
BertLayer
(
config
)
for
_
in
range
(
config
.
num_hidden_layers
)])
...
...
@@ -440,7 +440,7 @@ class BertEncoder(nn.Module):
class
BertPooler
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertPooler
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
activation
=
nn
.
Tanh
()
...
...
@@ -455,7 +455,7 @@ class BertPooler(nn.Module):
class
BertPredictionHeadTransform
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertPredictionHeadTransform
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
if
isinstance
(
config
.
hidden_act
,
str
):
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
...
...
@@ -472,7 +472,7 @@ class BertPredictionHeadTransform(nn.Module):
class
BertLMPredictionHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertLMPredictionHead
,
self
).
__init__
()
super
().
__init__
()
self
.
transform
=
BertPredictionHeadTransform
(
config
)
# The output weights are the same as the input embeddings, but there is
...
...
@@ -492,7 +492,7 @@ class BertLMPredictionHead(nn.Module):
class
BertOnlyMLMHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertOnlyMLMHead
,
self
).
__init__
()
super
().
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
)
def
forward
(
self
,
sequence_output
):
...
...
@@ -502,7 +502,7 @@ class BertOnlyMLMHead(nn.Module):
class
BertOnlyNSPHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertOnlyNSPHead
,
self
).
__init__
()
super
().
__init__
()
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
def
forward
(
self
,
pooled_output
):
...
...
@@ -512,7 +512,7 @@ class BertOnlyNSPHead(nn.Module):
class
BertPreTrainingHeads
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
BertPreTrainingHeads
,
self
).
__init__
()
super
().
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
)
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
...
...
@@ -657,7 +657,7 @@ class BertModel(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
config
=
config
self
.
embeddings
=
BertEmbeddings
(
config
)
...
...
@@ -864,7 +864,7 @@ class BertForPreTraining(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForPreTraining
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertPreTrainingHeads
(
config
)
...
...
@@ -954,7 +954,7 @@ class BertForMaskedLM(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertOnlyMLMHead
(
config
)
...
...
@@ -1053,7 +1053,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForNextSentencePrediction
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertOnlyNSPHead
(
config
)
...
...
@@ -1132,7 +1132,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
...
...
@@ -1221,7 +1221,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForMultipleChoice
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
...
@@ -1308,7 +1308,7 @@ class BertForTokenClassification(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
...
...
@@ -1406,7 +1406,7 @@ class BertForQuestionAnswering(BertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
BertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
...
...
src/transformers/modeling_ctrl.py
View file @
dc17f2a1
...
...
@@ -81,7 +81,7 @@ def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, head_mask=N
class
MultiHeadAttention
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
output_attentions
=
False
):
super
(
MultiHeadAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
output_attentions
self
.
num_heads
=
num_heads
self
.
d_model_size
=
d_model_size
...
...
@@ -132,7 +132,7 @@ def point_wise_feed_forward_network(d_model_size, dff):
class
EncoderLayer
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
dff
,
rate
=
0.1
,
output_attentions
=
False
):
super
(
EncoderLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
multi_head_attention
=
MultiHeadAttention
(
d_model_size
,
num_heads
,
output_attentions
)
self
.
ffn
=
point_wise_feed_forward_network
(
d_model_size
,
dff
)
...
...
@@ -274,7 +274,7 @@ class CTRLModel(CTRLPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
CTRLModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_attentions
=
config
.
output_attentions
self
.
output_past
=
config
.
output_past
...
...
@@ -481,7 +481,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
CTRLLMHeadModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
CTRLModel
(
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
True
)
...
...
src/transformers/modeling_distilbert.py
View file @
dc17f2a1
...
...
@@ -59,7 +59,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
class
Embeddings
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
Embeddings
,
self
).
__init__
()
super
().
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
dim
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
dim
)
if
config
.
sinusoidal_pos_embds
:
...
...
@@ -97,7 +97,7 @@ class Embeddings(nn.Module):
class
MultiHeadSelfAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
MultiHeadSelfAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
n_heads
=
config
.
n_heads
self
.
dim
=
config
.
dim
...
...
@@ -195,7 +195,7 @@ class MultiHeadSelfAttention(nn.Module):
class
FFN
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
FFN
,
self
).
__init__
()
super
().
__init__
()
self
.
dropout
=
nn
.
Dropout
(
p
=
config
.
dropout
)
self
.
lin1
=
nn
.
Linear
(
in_features
=
config
.
dim
,
out_features
=
config
.
hidden_dim
)
self
.
lin2
=
nn
.
Linear
(
in_features
=
config
.
hidden_dim
,
out_features
=
config
.
dim
)
...
...
@@ -214,7 +214,7 @@ class FFN(nn.Module):
class
TransformerBlock
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
TransformerBlock
,
self
).
__init__
()
super
().
__init__
()
self
.
n_heads
=
config
.
n_heads
self
.
dim
=
config
.
dim
...
...
@@ -266,7 +266,7 @@ class TransformerBlock(nn.Module):
class
Transformer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
Transformer
,
self
).
__init__
()
super
().
__init__
()
self
.
n_layers
=
config
.
n_layers
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
...
...
@@ -424,7 +424,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
embeddings
=
Embeddings
(
config
)
# Embeddings
self
.
transformer
=
Transformer
(
config
)
# Encoder
...
...
@@ -525,7 +525,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
...
...
@@ -600,7 +600,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
distilbert
=
DistilBertModel
(
config
)
...
...
@@ -679,7 +679,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
distilbert
=
DistilBertModel
(
config
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
...
...
@@ -766,7 +766,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
DistilBertForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
distilbert
=
DistilBertModel
(
config
)
...
...
src/transformers/modeling_encoder_decoder.py
View file @
dc17f2a1
...
...
@@ -37,7 +37,7 @@ class PreTrainedEncoderDecoder(nn.Module):
"""
def
__init__
(
self
,
encoder
,
decoder
):
super
(
PreTrainedEncoderDecoder
,
self
).
__init__
()
super
().
__init__
()
self
.
encoder
=
encoder
self
.
decoder
=
decoder
...
...
@@ -290,7 +290,7 @@ class Model2Model(PreTrainedEncoderDecoder):
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Model2Model
,
self
).
__init__
(
*
args
,
**
kwargs
)
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
tie_weights
()
def
tie_weights
(
self
):
...
...
@@ -321,7 +321,7 @@ class Model2Model(PreTrainedEncoderDecoder):
):
raise
ValueError
(
"Only the Bert model is currently supported."
)
model
=
super
(
Model2Model
,
cls
).
from_pretrained
(
model
=
super
().
from_pretrained
(
encoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
decoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
*
args
,
...
...
@@ -345,5 +345,5 @@ class Model2LSTM(PreTrainedEncoderDecoder):
" E.g. `decoder_config={'input_size': 768, 'hidden_size': 768, 'num_layers': 2}`"
)
kwargs
[
"decoder_model"
]
=
torch
.
nn
.
LSTM
(
kwargs
.
pop
(
"decoder_config"
))
model
=
super
(
Model2LSTM
,
cls
).
from_pretrained
(
*
args
,
**
kwargs
)
model
=
super
().
from_pretrained
(
*
args
,
**
kwargs
)
return
model
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment