Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
dc17f2a1
Unverified
Commit
dc17f2a1
authored
Jan 16, 2020
by
Thomas Wolf
Committed by
GitHub
Jan 16, 2020
Browse files
Merge pull request #2538 from huggingface/py3_super
💄
super
parents
88085484
a98b2ca8
Changes
75
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
71 additions
and
71 deletions
+71
-71
examples/mm-imdb/utils_mmimdb.py
examples/mm-imdb/utils_mmimdb.py
+1
-1
examples/pplm/pplm_classification_head.py
examples/pplm/pplm_classification_head.py
+1
-1
examples/pplm/run_pplm_discrim_train.py
examples/pplm/run_pplm_discrim_train.py
+1
-1
examples/summarization/configuration_bertabs.py
examples/summarization/configuration_bertabs.py
+1
-1
examples/summarization/modeling_bertabs.py
examples/summarization/modeling_bertabs.py
+7
-7
src/transformers/configuration_albert.py
src/transformers/configuration_albert.py
+1
-1
src/transformers/configuration_bert.py
src/transformers/configuration_bert.py
+1
-1
src/transformers/configuration_ctrl.py
src/transformers/configuration_ctrl.py
+1
-1
src/transformers/configuration_distilbert.py
src/transformers/configuration_distilbert.py
+1
-1
src/transformers/configuration_gpt2.py
src/transformers/configuration_gpt2.py
+1
-1
src/transformers/configuration_openai.py
src/transformers/configuration_openai.py
+1
-1
src/transformers/configuration_t5.py
src/transformers/configuration_t5.py
+1
-1
src/transformers/configuration_transfo_xl.py
src/transformers/configuration_transfo_xl.py
+1
-1
src/transformers/configuration_xlm.py
src/transformers/configuration_xlm.py
+1
-1
src/transformers/configuration_xlnet.py
src/transformers/configuration_xlnet.py
+1
-1
src/transformers/modeling_albert.py
src/transformers/modeling_albert.py
+10
-10
src/transformers/modeling_bert.py
src/transformers/modeling_bert.py
+22
-22
src/transformers/modeling_ctrl.py
src/transformers/modeling_ctrl.py
+4
-4
src/transformers/modeling_distilbert.py
src/transformers/modeling_distilbert.py
+10
-10
src/transformers/modeling_encoder_decoder.py
src/transformers/modeling_encoder_decoder.py
+4
-4
No files found.
examples/mm-imdb/utils_mmimdb.py
View file @
dc17f2a1
...
@@ -31,7 +31,7 @@ POOLING_BREAKDOWN = {1: (1, 1), 2: (2, 1), 3: (3, 1), 4: (2, 2), 5: (5, 1), 6: (
...
@@ -31,7 +31,7 @@ POOLING_BREAKDOWN = {1: (1, 1), 2: (2, 1), 3: (3, 1), 4: (2, 2), 5: (5, 1), 6: (
class
ImageEncoder
(
nn
.
Module
):
class
ImageEncoder
(
nn
.
Module
):
def
__init__
(
self
,
args
):
def
__init__
(
self
,
args
):
super
(
ImageEncoder
,
self
).
__init__
()
super
().
__init__
()
model
=
torchvision
.
models
.
resnet152
(
pretrained
=
True
)
model
=
torchvision
.
models
.
resnet152
(
pretrained
=
True
)
modules
=
list
(
model
.
children
())[:
-
2
]
modules
=
list
(
model
.
children
())[:
-
2
]
self
.
model
=
nn
.
Sequential
(
*
modules
)
self
.
model
=
nn
.
Sequential
(
*
modules
)
...
...
examples/pplm/pplm_classification_head.py
View file @
dc17f2a1
...
@@ -5,7 +5,7 @@ class ClassificationHead(torch.nn.Module):
...
@@ -5,7 +5,7 @@ class ClassificationHead(torch.nn.Module):
"""Classification Head for transformer encoders"""
"""Classification Head for transformer encoders"""
def
__init__
(
self
,
class_size
,
embed_size
):
def
__init__
(
self
,
class_size
,
embed_size
):
super
(
ClassificationHead
,
self
).
__init__
()
super
().
__init__
()
self
.
class_size
=
class_size
self
.
class_size
=
class_size
self
.
embed_size
=
embed_size
self
.
embed_size
=
embed_size
# self.mlp1 = torch.nn.Linear(embed_size, embed_size)
# self.mlp1 = torch.nn.Linear(embed_size, embed_size)
...
...
examples/pplm/run_pplm_discrim_train.py
View file @
dc17f2a1
...
@@ -46,7 +46,7 @@ class Discriminator(torch.nn.Module):
...
@@ -46,7 +46,7 @@ class Discriminator(torch.nn.Module):
"""Transformer encoder followed by a Classification Head"""
"""Transformer encoder followed by a Classification Head"""
def
__init__
(
self
,
class_size
,
pretrained_model
=
"gpt2-medium"
,
cached_mode
=
False
,
device
=
"cpu"
):
def
__init__
(
self
,
class_size
,
pretrained_model
=
"gpt2-medium"
,
cached_mode
=
False
,
device
=
"cpu"
):
super
(
Discriminator
,
self
).
__init__
()
super
().
__init__
()
self
.
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
pretrained_model
)
self
.
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
pretrained_model
)
self
.
encoder
=
GPT2LMHeadModel
.
from_pretrained
(
pretrained_model
)
self
.
encoder
=
GPT2LMHeadModel
.
from_pretrained
(
pretrained_model
)
self
.
embed_size
=
self
.
encoder
.
transformer
.
config
.
hidden_size
self
.
embed_size
=
self
.
encoder
.
transformer
.
config
.
hidden_size
...
...
examples/summarization/configuration_bertabs.py
View file @
dc17f2a1
...
@@ -80,7 +80,7 @@ class BertAbsConfig(PretrainedConfig):
...
@@ -80,7 +80,7 @@ class BertAbsConfig(PretrainedConfig):
dec_dropout
=
0.2
,
dec_dropout
=
0.2
,
**
kwargs
,
**
kwargs
,
):
):
super
(
BertAbsConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
max_pos
=
max_pos
self
.
max_pos
=
max_pos
...
...
examples/summarization/modeling_bertabs.py
View file @
dc17f2a1
...
@@ -47,7 +47,7 @@ class BertAbsPreTrainedModel(PreTrainedModel):
...
@@ -47,7 +47,7 @@ class BertAbsPreTrainedModel(PreTrainedModel):
class
BertAbs
(
BertAbsPreTrainedModel
):
class
BertAbs
(
BertAbsPreTrainedModel
):
def
__init__
(
self
,
args
,
checkpoint
=
None
,
bert_extractive_checkpoint
=
None
):
def
__init__
(
self
,
args
,
checkpoint
=
None
,
bert_extractive_checkpoint
=
None
):
super
(
BertAbs
,
self
).
__init__
(
args
)
super
().
__init__
(
args
)
self
.
args
=
args
self
.
args
=
args
self
.
bert
=
Bert
()
self
.
bert
=
Bert
()
...
@@ -122,7 +122,7 @@ class Bert(nn.Module):
...
@@ -122,7 +122,7 @@ class Bert(nn.Module):
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
super
(
Bert
,
self
).
__init__
()
super
().
__init__
()
config
=
BertConfig
.
from_pretrained
(
"bert-base-uncased"
)
config
=
BertConfig
.
from_pretrained
(
"bert-base-uncased"
)
self
.
model
=
BertModel
(
config
)
self
.
model
=
BertModel
(
config
)
...
@@ -151,7 +151,7 @@ class TransformerDecoder(nn.Module):
...
@@ -151,7 +151,7 @@ class TransformerDecoder(nn.Module):
"""
"""
def
__init__
(
self
,
num_layers
,
d_model
,
heads
,
d_ff
,
dropout
,
embeddings
,
vocab_size
):
def
__init__
(
self
,
num_layers
,
d_model
,
heads
,
d_ff
,
dropout
,
embeddings
,
vocab_size
):
super
(
TransformerDecoder
,
self
).
__init__
()
super
().
__init__
()
# Basic attributes.
# Basic attributes.
self
.
decoder_type
=
"transformer"
self
.
decoder_type
=
"transformer"
...
@@ -261,7 +261,7 @@ class PositionalEncoding(nn.Module):
...
@@ -261,7 +261,7 @@ class PositionalEncoding(nn.Module):
pe
[:,
0
::
2
]
=
torch
.
sin
(
position
.
float
()
*
div_term
)
pe
[:,
0
::
2
]
=
torch
.
sin
(
position
.
float
()
*
div_term
)
pe
[:,
1
::
2
]
=
torch
.
cos
(
position
.
float
()
*
div_term
)
pe
[:,
1
::
2
]
=
torch
.
cos
(
position
.
float
()
*
div_term
)
pe
=
pe
.
unsqueeze
(
0
)
pe
=
pe
.
unsqueeze
(
0
)
super
(
PositionalEncoding
,
self
).
__init__
()
super
().
__init__
()
self
.
register_buffer
(
"pe"
,
pe
)
self
.
register_buffer
(
"pe"
,
pe
)
self
.
dropout
=
nn
.
Dropout
(
p
=
dropout
)
self
.
dropout
=
nn
.
Dropout
(
p
=
dropout
)
self
.
dim
=
dim
self
.
dim
=
dim
...
@@ -293,7 +293,7 @@ class TransformerDecoderLayer(nn.Module):
...
@@ -293,7 +293,7 @@ class TransformerDecoderLayer(nn.Module):
"""
"""
def
__init__
(
self
,
d_model
,
heads
,
d_ff
,
dropout
):
def
__init__
(
self
,
d_model
,
heads
,
d_ff
,
dropout
):
super
(
TransformerDecoderLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
self_attn
=
MultiHeadedAttention
(
heads
,
d_model
,
dropout
=
dropout
)
self
.
self_attn
=
MultiHeadedAttention
(
heads
,
d_model
,
dropout
=
dropout
)
...
@@ -410,7 +410,7 @@ class MultiHeadedAttention(nn.Module):
...
@@ -410,7 +410,7 @@ class MultiHeadedAttention(nn.Module):
self
.
dim_per_head
=
model_dim
//
head_count
self
.
dim_per_head
=
model_dim
//
head_count
self
.
model_dim
=
model_dim
self
.
model_dim
=
model_dim
super
(
MultiHeadedAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
head_count
=
head_count
self
.
head_count
=
head_count
self
.
linear_keys
=
nn
.
Linear
(
model_dim
,
head_count
*
self
.
dim_per_head
)
self
.
linear_keys
=
nn
.
Linear
(
model_dim
,
head_count
*
self
.
dim_per_head
)
...
@@ -639,7 +639,7 @@ class PositionwiseFeedForward(nn.Module):
...
@@ -639,7 +639,7 @@ class PositionwiseFeedForward(nn.Module):
"""
"""
def
__init__
(
self
,
d_model
,
d_ff
,
dropout
=
0.1
):
def
__init__
(
self
,
d_model
,
d_ff
,
dropout
=
0.1
):
super
(
PositionwiseFeedForward
,
self
).
__init__
()
super
().
__init__
()
self
.
w_1
=
nn
.
Linear
(
d_model
,
d_ff
)
self
.
w_1
=
nn
.
Linear
(
d_model
,
d_ff
)
self
.
w_2
=
nn
.
Linear
(
d_ff
,
d_model
)
self
.
w_2
=
nn
.
Linear
(
d_ff
,
d_model
)
self
.
layer_norm
=
nn
.
LayerNorm
(
d_model
,
eps
=
1e-6
)
self
.
layer_norm
=
nn
.
LayerNorm
(
d_model
,
eps
=
1e-6
)
...
...
src/transformers/configuration_albert.py
View file @
dc17f2a1
...
@@ -122,7 +122,7 @@ class AlbertConfig(PretrainedConfig):
...
@@ -122,7 +122,7 @@ class AlbertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
layer_norm_eps
=
1e-12
,
**
kwargs
**
kwargs
):
):
super
(
AlbertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
self
.
embedding_size
=
embedding_size
...
...
src/transformers/configuration_bert.py
View file @
dc17f2a1
...
@@ -125,7 +125,7 @@ class BertConfig(PretrainedConfig):
...
@@ -125,7 +125,7 @@ class BertConfig(PretrainedConfig):
layer_norm_eps
=
1e-12
,
layer_norm_eps
=
1e-12
,
**
kwargs
**
kwargs
):
):
super
(
BertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
hidden_size
=
hidden_size
...
...
src/transformers/configuration_ctrl.py
View file @
dc17f2a1
...
@@ -106,7 +106,7 @@ class CTRLConfig(PretrainedConfig):
...
@@ -106,7 +106,7 @@ class CTRLConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
summary_first_dropout
=
0.1
,
**
kwargs
**
kwargs
):
):
super
(
CTRLConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
...
...
src/transformers/configuration_distilbert.py
View file @
dc17f2a1
...
@@ -113,7 +113,7 @@ class DistilBertConfig(PretrainedConfig):
...
@@ -113,7 +113,7 @@ class DistilBertConfig(PretrainedConfig):
seq_classif_dropout
=
0.2
,
seq_classif_dropout
=
0.2
,
**
kwargs
**
kwargs
):
):
super
(
DistilBertConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
max_position_embeddings
=
max_position_embeddings
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
self
.
sinusoidal_pos_embds
=
sinusoidal_pos_embds
...
...
src/transformers/configuration_gpt2.py
View file @
dc17f2a1
...
@@ -136,7 +136,7 @@ class GPT2Config(PretrainedConfig):
...
@@ -136,7 +136,7 @@ class GPT2Config(PretrainedConfig):
summary_first_dropout
=
0.1
,
summary_first_dropout
=
0.1
,
**
kwargs
**
kwargs
):
):
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
...
...
src/transformers/configuration_openai.py
View file @
dc17f2a1
...
@@ -138,7 +138,7 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -138,7 +138,7 @@ class OpenAIGPTConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
summary_first_dropout
=
0.1
,
**
kwargs
**
kwargs
):
):
super
(
OpenAIGPTConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
...
...
src/transformers/configuration_t5.py
View file @
dc17f2a1
...
@@ -77,7 +77,7 @@ class T5Config(PretrainedConfig):
...
@@ -77,7 +77,7 @@ class T5Config(PretrainedConfig):
initializer_factor
=
1.0
,
initializer_factor
=
1.0
,
**
kwargs
**
kwargs
):
):
super
(
T5Config
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
self
.
d_model
=
d_model
self
.
d_model
=
d_model
...
...
src/transformers/configuration_transfo_xl.py
View file @
dc17f2a1
...
@@ -151,7 +151,7 @@ class TransfoXLConfig(PretrainedConfig):
...
@@ -151,7 +151,7 @@ class TransfoXLConfig(PretrainedConfig):
layer_norm_epsilon
=
1e-5
,
layer_norm_epsilon
=
1e-5
,
**
kwargs
**
kwargs
):
):
super
(
TransfoXLConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
[]
self
.
cutoffs
=
[]
...
...
src/transformers/configuration_xlm.py
View file @
dc17f2a1
...
@@ -197,7 +197,7 @@ class XLMConfig(PretrainedConfig):
...
@@ -197,7 +197,7 @@ class XLMConfig(PretrainedConfig):
):
):
"""Constructs XLMConfig.
"""Constructs XLMConfig.
"""
"""
super
(
XLMConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
emb_dim
=
emb_dim
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
self
.
n_layers
=
n_layers
...
...
src/transformers/configuration_xlnet.py
View file @
dc17f2a1
...
@@ -159,7 +159,7 @@ class XLNetConfig(PretrainedConfig):
...
@@ -159,7 +159,7 @@ class XLNetConfig(PretrainedConfig):
):
):
"""Constructs XLNetConfig.
"""Constructs XLNetConfig.
"""
"""
super
(
XLNetConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
d_model
=
d_model
self
.
d_model
=
d_model
self
.
n_layer
=
n_layer
self
.
n_layer
=
n_layer
...
...
src/transformers/modeling_albert.py
View file @
dc17f2a1
...
@@ -167,7 +167,7 @@ class AlbertEmbeddings(BertEmbeddings):
...
@@ -167,7 +167,7 @@ class AlbertEmbeddings(BertEmbeddings):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertEmbeddings
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
embedding_size
,
padding_idx
=
0
)
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
embedding_size
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
embedding_size
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
embedding_size
)
...
@@ -177,7 +177,7 @@ class AlbertEmbeddings(BertEmbeddings):
...
@@ -177,7 +177,7 @@ class AlbertEmbeddings(BertEmbeddings):
class
AlbertAttention
(
BertSelfAttention
):
class
AlbertAttention
(
BertSelfAttention
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertAttention
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
num_attention_heads
=
config
.
num_attention_heads
self
.
num_attention_heads
=
config
.
num_attention_heads
...
@@ -258,7 +258,7 @@ class AlbertAttention(BertSelfAttention):
...
@@ -258,7 +258,7 @@ class AlbertAttention(BertSelfAttention):
class
AlbertLayer
(
nn
.
Module
):
class
AlbertLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
full_layer_layer_norm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
full_layer_layer_norm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
...
@@ -279,7 +279,7 @@ class AlbertLayer(nn.Module):
...
@@ -279,7 +279,7 @@ class AlbertLayer(nn.Module):
class
AlbertLayerGroup
(
nn
.
Module
):
class
AlbertLayerGroup
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertLayerGroup
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
...
@@ -309,7 +309,7 @@ class AlbertLayerGroup(nn.Module):
...
@@ -309,7 +309,7 @@ class AlbertLayerGroup(nn.Module):
class
AlbertTransformer
(
nn
.
Module
):
class
AlbertTransformer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertTransformer
,
self
).
__init__
()
super
().
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
...
@@ -471,7 +471,7 @@ class AlbertModel(AlbertPreTrainedModel):
...
@@ -471,7 +471,7 @@ class AlbertModel(AlbertPreTrainedModel):
base_model_prefix
=
"albert"
base_model_prefix
=
"albert"
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
config
=
config
self
.
config
=
config
self
.
embeddings
=
AlbertEmbeddings
(
config
)
self
.
embeddings
=
AlbertEmbeddings
(
config
)
...
@@ -571,7 +571,7 @@ class AlbertModel(AlbertPreTrainedModel):
...
@@ -571,7 +571,7 @@ class AlbertModel(AlbertPreTrainedModel):
class
AlbertMLMHead
(
nn
.
Module
):
class
AlbertMLMHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertMLMHead
,
self
).
__init__
()
super
().
__init__
()
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
embedding_size
)
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
embedding_size
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
...
@@ -619,7 +619,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
...
@@ -619,7 +619,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
albert
=
AlbertModel
(
config
)
self
.
albert
=
AlbertModel
(
config
)
self
.
predictions
=
AlbertMLMHead
(
config
)
self
.
predictions
=
AlbertMLMHead
(
config
)
...
@@ -706,7 +706,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
...
@@ -706,7 +706,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
albert
=
AlbertModel
(
config
)
self
.
albert
=
AlbertModel
(
config
)
...
@@ -804,7 +804,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
...
@@ -804,7 +804,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
albert
=
AlbertModel
(
config
)
self
.
albert
=
AlbertModel
(
config
)
...
...
src/transformers/modeling_bert.py
View file @
dc17f2a1
...
@@ -160,7 +160,7 @@ class BertEmbeddings(nn.Module):
...
@@ -160,7 +160,7 @@ class BertEmbeddings(nn.Module):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertEmbeddings
,
self
).
__init__
()
super
().
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
hidden_size
,
padding_idx
=
0
)
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
hidden_size
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
hidden_size
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
hidden_size
)
self
.
token_type_embeddings
=
nn
.
Embedding
(
config
.
type_vocab_size
,
config
.
hidden_size
)
self
.
token_type_embeddings
=
nn
.
Embedding
(
config
.
type_vocab_size
,
config
.
hidden_size
)
...
@@ -197,7 +197,7 @@ class BertEmbeddings(nn.Module):
...
@@ -197,7 +197,7 @@ class BertEmbeddings(nn.Module):
class
BertSelfAttention
(
nn
.
Module
):
class
BertSelfAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertSelfAttention
,
self
).
__init__
()
super
().
__init__
()
if
config
.
hidden_size
%
config
.
num_attention_heads
!=
0
:
if
config
.
hidden_size
%
config
.
num_attention_heads
!=
0
:
raise
ValueError
(
raise
ValueError
(
"The hidden size (%d) is not a multiple of the number of attention "
"The hidden size (%d) is not a multiple of the number of attention "
...
@@ -275,7 +275,7 @@ class BertSelfAttention(nn.Module):
...
@@ -275,7 +275,7 @@ class BertSelfAttention(nn.Module):
class
BertSelfOutput
(
nn
.
Module
):
class
BertSelfOutput
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertSelfOutput
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
@@ -289,7 +289,7 @@ class BertSelfOutput(nn.Module):
...
@@ -289,7 +289,7 @@ class BertSelfOutput(nn.Module):
class
BertAttention
(
nn
.
Module
):
class
BertAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
self
=
BertSelfAttention
(
config
)
self
.
self
=
BertSelfAttention
(
config
)
self
.
output
=
BertSelfOutput
(
config
)
self
.
output
=
BertSelfOutput
(
config
)
self
.
pruned_heads
=
set
()
self
.
pruned_heads
=
set
()
...
@@ -335,7 +335,7 @@ class BertAttention(nn.Module):
...
@@ -335,7 +335,7 @@ class BertAttention(nn.Module):
class
BertIntermediate
(
nn
.
Module
):
class
BertIntermediate
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertIntermediate
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
intermediate_size
)
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
intermediate_size
)
if
isinstance
(
config
.
hidden_act
,
str
):
if
isinstance
(
config
.
hidden_act
,
str
):
self
.
intermediate_act_fn
=
ACT2FN
[
config
.
hidden_act
]
self
.
intermediate_act_fn
=
ACT2FN
[
config
.
hidden_act
]
...
@@ -350,7 +350,7 @@ class BertIntermediate(nn.Module):
...
@@ -350,7 +350,7 @@ class BertIntermediate(nn.Module):
class
BertOutput
(
nn
.
Module
):
class
BertOutput
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertOutput
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
self
.
dense
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
LayerNorm
=
BertLayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
@@ -364,7 +364,7 @@ class BertOutput(nn.Module):
...
@@ -364,7 +364,7 @@ class BertOutput(nn.Module):
class
BertLayer
(
nn
.
Module
):
class
BertLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
attention
=
BertAttention
(
config
)
self
.
attention
=
BertAttention
(
config
)
self
.
is_decoder
=
config
.
is_decoder
self
.
is_decoder
=
config
.
is_decoder
if
self
.
is_decoder
:
if
self
.
is_decoder
:
...
@@ -399,7 +399,7 @@ class BertLayer(nn.Module):
...
@@ -399,7 +399,7 @@ class BertLayer(nn.Module):
class
BertEncoder
(
nn
.
Module
):
class
BertEncoder
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertEncoder
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
layer
=
nn
.
ModuleList
([
BertLayer
(
config
)
for
_
in
range
(
config
.
num_hidden_layers
)])
self
.
layer
=
nn
.
ModuleList
([
BertLayer
(
config
)
for
_
in
range
(
config
.
num_hidden_layers
)])
...
@@ -440,7 +440,7 @@ class BertEncoder(nn.Module):
...
@@ -440,7 +440,7 @@ class BertEncoder(nn.Module):
class
BertPooler
(
nn
.
Module
):
class
BertPooler
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertPooler
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
activation
=
nn
.
Tanh
()
self
.
activation
=
nn
.
Tanh
()
...
@@ -455,7 +455,7 @@ class BertPooler(nn.Module):
...
@@ -455,7 +455,7 @@ class BertPooler(nn.Module):
class
BertPredictionHeadTransform
(
nn
.
Module
):
class
BertPredictionHeadTransform
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertPredictionHeadTransform
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
hidden_size
)
if
isinstance
(
config
.
hidden_act
,
str
):
if
isinstance
(
config
.
hidden_act
,
str
):
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
self
.
transform_act_fn
=
ACT2FN
[
config
.
hidden_act
]
...
@@ -472,7 +472,7 @@ class BertPredictionHeadTransform(nn.Module):
...
@@ -472,7 +472,7 @@ class BertPredictionHeadTransform(nn.Module):
class
BertLMPredictionHead
(
nn
.
Module
):
class
BertLMPredictionHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertLMPredictionHead
,
self
).
__init__
()
super
().
__init__
()
self
.
transform
=
BertPredictionHeadTransform
(
config
)
self
.
transform
=
BertPredictionHeadTransform
(
config
)
# The output weights are the same as the input embeddings, but there is
# The output weights are the same as the input embeddings, but there is
...
@@ -492,7 +492,7 @@ class BertLMPredictionHead(nn.Module):
...
@@ -492,7 +492,7 @@ class BertLMPredictionHead(nn.Module):
class
BertOnlyMLMHead
(
nn
.
Module
):
class
BertOnlyMLMHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertOnlyMLMHead
,
self
).
__init__
()
super
().
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
)
self
.
predictions
=
BertLMPredictionHead
(
config
)
def
forward
(
self
,
sequence_output
):
def
forward
(
self
,
sequence_output
):
...
@@ -502,7 +502,7 @@ class BertOnlyMLMHead(nn.Module):
...
@@ -502,7 +502,7 @@ class BertOnlyMLMHead(nn.Module):
class
BertOnlyNSPHead
(
nn
.
Module
):
class
BertOnlyNSPHead
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertOnlyNSPHead
,
self
).
__init__
()
super
().
__init__
()
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
def
forward
(
self
,
pooled_output
):
def
forward
(
self
,
pooled_output
):
...
@@ -512,7 +512,7 @@ class BertOnlyNSPHead(nn.Module):
...
@@ -512,7 +512,7 @@ class BertOnlyNSPHead(nn.Module):
class
BertPreTrainingHeads
(
nn
.
Module
):
class
BertPreTrainingHeads
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertPreTrainingHeads
,
self
).
__init__
()
super
().
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
)
self
.
predictions
=
BertLMPredictionHead
(
config
)
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
...
@@ -657,7 +657,7 @@ class BertModel(BertPreTrainedModel):
...
@@ -657,7 +657,7 @@ class BertModel(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
config
=
config
self
.
config
=
config
self
.
embeddings
=
BertEmbeddings
(
config
)
self
.
embeddings
=
BertEmbeddings
(
config
)
...
@@ -864,7 +864,7 @@ class BertForPreTraining(BertPreTrainedModel):
...
@@ -864,7 +864,7 @@ class BertForPreTraining(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertForPreTraining
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertPreTrainingHeads
(
config
)
self
.
cls
=
BertPreTrainingHeads
(
config
)
...
@@ -954,7 +954,7 @@ class BertForMaskedLM(BertPreTrainedModel):
...
@@ -954,7 +954,7 @@ class BertForMaskedLM(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertOnlyMLMHead
(
config
)
self
.
cls
=
BertOnlyMLMHead
(
config
)
...
@@ -1053,7 +1053,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
...
@@ -1053,7 +1053,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertForNextSentencePrediction
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertOnlyNSPHead
(
config
)
self
.
cls
=
BertOnlyNSPHead
(
config
)
...
@@ -1132,7 +1132,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
...
@@ -1132,7 +1132,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
...
@@ -1221,7 +1221,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
...
@@ -1221,7 +1221,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertForMultipleChoice
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
...
@@ -1308,7 +1308,7 @@ class BertForTokenClassification(BertPreTrainedModel):
...
@@ -1308,7 +1308,7 @@ class BertForTokenClassification(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
...
@@ -1406,7 +1406,7 @@ class BertForQuestionAnswering(BertPreTrainedModel):
...
@@ -1406,7 +1406,7 @@ class BertForQuestionAnswering(BertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
BertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
...
...
src/transformers/modeling_ctrl.py
View file @
dc17f2a1
...
@@ -81,7 +81,7 @@ def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, head_mask=N
...
@@ -81,7 +81,7 @@ def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, head_mask=N
class
MultiHeadAttention
(
torch
.
nn
.
Module
):
class
MultiHeadAttention
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
output_attentions
=
False
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
output_attentions
=
False
):
super
(
MultiHeadAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
output_attentions
self
.
output_attentions
=
output_attentions
self
.
num_heads
=
num_heads
self
.
num_heads
=
num_heads
self
.
d_model_size
=
d_model_size
self
.
d_model_size
=
d_model_size
...
@@ -132,7 +132,7 @@ def point_wise_feed_forward_network(d_model_size, dff):
...
@@ -132,7 +132,7 @@ def point_wise_feed_forward_network(d_model_size, dff):
class
EncoderLayer
(
torch
.
nn
.
Module
):
class
EncoderLayer
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
dff
,
rate
=
0.1
,
output_attentions
=
False
):
def
__init__
(
self
,
d_model_size
,
num_heads
,
dff
,
rate
=
0.1
,
output_attentions
=
False
):
super
(
EncoderLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
multi_head_attention
=
MultiHeadAttention
(
d_model_size
,
num_heads
,
output_attentions
)
self
.
multi_head_attention
=
MultiHeadAttention
(
d_model_size
,
num_heads
,
output_attentions
)
self
.
ffn
=
point_wise_feed_forward_network
(
d_model_size
,
dff
)
self
.
ffn
=
point_wise_feed_forward_network
(
d_model_size
,
dff
)
...
@@ -274,7 +274,7 @@ class CTRLModel(CTRLPreTrainedModel):
...
@@ -274,7 +274,7 @@ class CTRLModel(CTRLPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
CTRLModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_past
=
config
.
output_past
self
.
output_past
=
config
.
output_past
...
@@ -481,7 +481,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
...
@@ -481,7 +481,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
CTRLLMHeadModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
CTRLModel
(
config
)
self
.
transformer
=
CTRLModel
(
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
True
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
True
)
...
...
src/transformers/modeling_distilbert.py
View file @
dc17f2a1
...
@@ -59,7 +59,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
...
@@ -59,7 +59,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
class
Embeddings
(
nn
.
Module
):
class
Embeddings
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
Embeddings
,
self
).
__init__
()
super
().
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
dim
,
padding_idx
=
0
)
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
dim
,
padding_idx
=
0
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
dim
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
dim
)
if
config
.
sinusoidal_pos_embds
:
if
config
.
sinusoidal_pos_embds
:
...
@@ -97,7 +97,7 @@ class Embeddings(nn.Module):
...
@@ -97,7 +97,7 @@ class Embeddings(nn.Module):
class
MultiHeadSelfAttention
(
nn
.
Module
):
class
MultiHeadSelfAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
MultiHeadSelfAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
n_heads
=
config
.
n_heads
self
.
n_heads
=
config
.
n_heads
self
.
dim
=
config
.
dim
self
.
dim
=
config
.
dim
...
@@ -195,7 +195,7 @@ class MultiHeadSelfAttention(nn.Module):
...
@@ -195,7 +195,7 @@ class MultiHeadSelfAttention(nn.Module):
class
FFN
(
nn
.
Module
):
class
FFN
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
FFN
,
self
).
__init__
()
super
().
__init__
()
self
.
dropout
=
nn
.
Dropout
(
p
=
config
.
dropout
)
self
.
dropout
=
nn
.
Dropout
(
p
=
config
.
dropout
)
self
.
lin1
=
nn
.
Linear
(
in_features
=
config
.
dim
,
out_features
=
config
.
hidden_dim
)
self
.
lin1
=
nn
.
Linear
(
in_features
=
config
.
dim
,
out_features
=
config
.
hidden_dim
)
self
.
lin2
=
nn
.
Linear
(
in_features
=
config
.
hidden_dim
,
out_features
=
config
.
dim
)
self
.
lin2
=
nn
.
Linear
(
in_features
=
config
.
hidden_dim
,
out_features
=
config
.
dim
)
...
@@ -214,7 +214,7 @@ class FFN(nn.Module):
...
@@ -214,7 +214,7 @@ class FFN(nn.Module):
class
TransformerBlock
(
nn
.
Module
):
class
TransformerBlock
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
TransformerBlock
,
self
).
__init__
()
super
().
__init__
()
self
.
n_heads
=
config
.
n_heads
self
.
n_heads
=
config
.
n_heads
self
.
dim
=
config
.
dim
self
.
dim
=
config
.
dim
...
@@ -266,7 +266,7 @@ class TransformerBlock(nn.Module):
...
@@ -266,7 +266,7 @@ class TransformerBlock(nn.Module):
class
Transformer
(
nn
.
Module
):
class
Transformer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
Transformer
,
self
).
__init__
()
super
().
__init__
()
self
.
n_layers
=
config
.
n_layers
self
.
n_layers
=
config
.
n_layers
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
...
@@ -424,7 +424,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
...
@@ -424,7 +424,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
DistilBertModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
embeddings
=
Embeddings
(
config
)
# Embeddings
self
.
embeddings
=
Embeddings
(
config
)
# Embeddings
self
.
transformer
=
Transformer
(
config
)
# Encoder
self
.
transformer
=
Transformer
(
config
)
# Encoder
...
@@ -525,7 +525,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
...
@@ -525,7 +525,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
DistilBertForMaskedLM
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
...
@@ -600,7 +600,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
...
@@ -600,7 +600,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
DistilBertForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
distilbert
=
DistilBertModel
(
config
)
self
.
distilbert
=
DistilBertModel
(
config
)
...
@@ -679,7 +679,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
...
@@ -679,7 +679,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
DistilBertForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
distilbert
=
DistilBertModel
(
config
)
self
.
distilbert
=
DistilBertModel
(
config
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
...
@@ -766,7 +766,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
...
@@ -766,7 +766,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
DistilBertForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
distilbert
=
DistilBertModel
(
config
)
self
.
distilbert
=
DistilBertModel
(
config
)
...
...
src/transformers/modeling_encoder_decoder.py
View file @
dc17f2a1
...
@@ -37,7 +37,7 @@ class PreTrainedEncoderDecoder(nn.Module):
...
@@ -37,7 +37,7 @@ class PreTrainedEncoderDecoder(nn.Module):
"""
"""
def
__init__
(
self
,
encoder
,
decoder
):
def
__init__
(
self
,
encoder
,
decoder
):
super
(
PreTrainedEncoderDecoder
,
self
).
__init__
()
super
().
__init__
()
self
.
encoder
=
encoder
self
.
encoder
=
encoder
self
.
decoder
=
decoder
self
.
decoder
=
decoder
...
@@ -290,7 +290,7 @@ class Model2Model(PreTrainedEncoderDecoder):
...
@@ -290,7 +290,7 @@ class Model2Model(PreTrainedEncoderDecoder):
"""
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Model2Model
,
self
).
__init__
(
*
args
,
**
kwargs
)
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
tie_weights
()
self
.
tie_weights
()
def
tie_weights
(
self
):
def
tie_weights
(
self
):
...
@@ -321,7 +321,7 @@ class Model2Model(PreTrainedEncoderDecoder):
...
@@ -321,7 +321,7 @@ class Model2Model(PreTrainedEncoderDecoder):
):
):
raise
ValueError
(
"Only the Bert model is currently supported."
)
raise
ValueError
(
"Only the Bert model is currently supported."
)
model
=
super
(
Model2Model
,
cls
).
from_pretrained
(
model
=
super
().
from_pretrained
(
encoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
encoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
decoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
decoder_pretrained_model_name_or_path
=
pretrained_model_name_or_path
,
*
args
,
*
args
,
...
@@ -345,5 +345,5 @@ class Model2LSTM(PreTrainedEncoderDecoder):
...
@@ -345,5 +345,5 @@ class Model2LSTM(PreTrainedEncoderDecoder):
" E.g. `decoder_config={'input_size': 768, 'hidden_size': 768, 'num_layers': 2}`"
" E.g. `decoder_config={'input_size': 768, 'hidden_size': 768, 'num_layers': 2}`"
)
)
kwargs
[
"decoder_model"
]
=
torch
.
nn
.
LSTM
(
kwargs
.
pop
(
"decoder_config"
))
kwargs
[
"decoder_model"
]
=
torch
.
nn
.
LSTM
(
kwargs
.
pop
(
"decoder_config"
))
model
=
super
(
Model2LSTM
,
cls
).
from_pretrained
(
*
args
,
**
kwargs
)
model
=
super
().
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment