Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
60c984da
Commit
60c984da
authored
Aug 27, 2019
by
VictorSanh
Browse files
fix bugs
parent
42968138
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
77 deletions
+79
-77
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+2
-1
pytorch_transformers/modeling_dilbert.py
pytorch_transformers/modeling_dilbert.py
+77
-76
No files found.
pytorch_transformers/__init__.py
View file @
60c984da
...
...
@@ -40,7 +40,8 @@ from .modeling_xlm import (XLMConfig, XLMPreTrainedModel , XLMModel,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_roberta
import
(
RobertaConfig
,
RobertaForMaskedLM
,
RobertaModel
,
RobertaForSequenceClassification
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_dilbert
import
(
DilBertconfig
,
DilBertForMaskedLM
,
DilBertModel
,
DilBertForSequenceClassification
,
from
.modeling_dilbert
import
(
DilBertConfig
,
DilBertForMaskedLM
,
DilBertModel
,
DilBertForSequenceClassification
,
DilBertForQuestionAnswering
,
DILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
DILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
...
...
pytorch_transformers/modeling_dilbert.py
View file @
60c984da
...
...
@@ -45,7 +45,7 @@ DILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
}
class
DilBert
c
onfig
(
PretrainedConfig
):
class
DilBert
C
onfig
(
PretrainedConfig
):
pretrained_config_archive_map
=
DILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
...
...
@@ -62,7 +62,7 @@ class DilBertconfig(PretrainedConfig):
initializer_range
=
0.02
,
tie_weights
=
True
,
**
kwargs
):
super
(
DilBert
c
onfig
,
self
).
__init__
(
**
kwargs
)
super
(
DilBert
C
onfig
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
vocab_size_or_config_json_file
,
str
)
or
(
sys
.
version_info
[
0
]
==
2
and
isinstance
(
vocab_size_or_config_json_file
,
unicode
)):
...
...
@@ -77,6 +77,7 @@ class DilBertconfig(PretrainedConfig):
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
self
.
dim
=
dim
self
.
hidden_dim
=
hidden_dim
self
.
dropout
=
dropout
self
.
attention_dropout
=
attention_dropout
self
.
activation
=
activation
...
...
@@ -341,7 +342,7 @@ class DilBertPreTrainedModel(PreTrainedModel):
""" An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""
config_class
=
DilBert
c
onfig
config_class
=
DilBert
C
onfig
pretrained_model_archive_map
=
DILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
None
base_model_prefix
=
"dilbert"
...
...
@@ -370,7 +371,7 @@ DILBERT_START_DOCSTRING = r"""
For more information on DilBERT, you should check TODO(Victor): Link to Medium
Parameters:
config (:class:`~pytorch_transformers.DilBert
c
onfig`): Model configuration class with all the parameters of the model.
config (:class:`~pytorch_transformers.DilBert
C
onfig`): Model configuration class with all the parameters of the model.
Initializing with a config file does not load the weights associated with the model, only the configuration.
Check out the :meth:`~pytorch_transformers.PreTrainedModel.from_pretrained` method to load the model weights.
"""
...
...
@@ -391,18 +392,7 @@ DILBERT_INPUTS_DOCSTRING = r"""
@
add_start_docstrings
(
"The bare DilBERT encoder/transformer outputing raw hidden-states without any specific head on top."
,
DILBERT_START_DOCSTRING
,
DILBERT_INPUTS_DOCSTRING
)
class
DilBertModel
(
DilBertPreTrainedModel
):
def
__init__
(
self
,
config
):
super
(
DilBertModel
,
self
).
__init__
(
config
)
self
.
embeddings
=
Embeddings
(
config
)
# Embeddings
self
.
transformer
=
Transformer
(
config
)
# Encoder
self
.
apply
(
self
.
init_weights
)
def
forward
(
self
,
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
=
None
):
"""
r
"""
Parameters
----------
input_ids: torch.tensor(bs, seq_length)
...
...
@@ -423,6 +413,17 @@ class DilBertModel(DilBertPreTrainedModel):
Tuple of length n_layers with the attention weights from each layer
Optional: only if output_attentions=True
"""
def
__init__
(
self
,
config
):
super
(
DilBertModel
,
self
).
__init__
(
config
)
self
.
embeddings
=
Embeddings
(
config
)
# Embeddings
self
.
transformer
=
Transformer
(
config
)
# Encoder
self
.
apply
(
self
.
init_weights
)
def
forward
(
self
,
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
=
None
):
if
attention_mask
is
None
:
attention_mask
=
torch
.
ones_like
(
input_ids
)
# (bs, seq_length)
...
...
@@ -438,6 +439,30 @@ class DilBertModel(DilBertPreTrainedModel):
@
add_start_docstrings
(
"""DilBert Model with a `masked language modeling` head on top. """
,
DILBERT_START_DOCSTRING
,
DILBERT_INPUTS_DOCSTRING
)
class
DilBertForMaskedLM
(
DilBertPreTrainedModel
):
r
"""
Parameters
----------
input_ids: torch.tensor(bs, seq_length)
Token ids.
attention_mask: torch.tensor(bs, seq_length)
Attention mask. Optional: If None, it's like there was no padding.
masked_lm_labels: torch.tensor(bs, seq_length)
The masked language modeling labels. Optional: If None, no loss is computed.
Outputs
-------
mlm_loss: torch.tensor(1,)
Masked Language Modeling loss to optimize.
Optional: only if `masked_lm_labels` is not None
prediction_logits: torch.tensor(bs, seq_length, voc_size)
Token prediction logits
all_hidden_states: Tuple[torch.tensor(bs, seq_length, dim)]
Tuple of length n_layers with the hidden states from each layer.
Optional: only if `output_hidden_states`=True
all_attentions: Tuple[torch.tensor(bs, n_heads, seq_length, seq_length)]
Tuple of length n_layers with the attention weights from each layer
Optional: only if `output_attentions`=True
"""
def
__init__
(
self
,
config
):
super
(
DilBertForMaskedLM
,
self
).
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
...
...
@@ -449,7 +474,7 @@ class DilBertForMaskedLM(DilBertPreTrainedModel):
self
.
vocab_projector
=
nn
.
Linear
(
config
.
dim
,
config
.
vocab_size
)
self
.
apply
(
self
.
init_weights
)
self
.
tie_weights
()
self
.
tie_weights
_
()
self
.
mlm_loss_fct
=
nn
.
CrossEntropyLoss
(
ignore_index
=-
1
)
...
...
@@ -464,30 +489,6 @@ class DilBertForMaskedLM(DilBertPreTrainedModel):
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
=
None
,
masked_lm_labels
:
torch
.
tensor
=
None
):
"""
Parameters
----------
input_ids: torch.tensor(bs, seq_length)
Token ids.
attention_mask: torch.tensor(bs, seq_length)
Attention mask. Optional: If None, it's like there was no padding.
masked_lm_labels: torch.tensor(bs, seq_length)
The masked language modeling labels. Optional: If None, no loss is computed.
Outputs
-------
mlm_loss: torch.tensor(1,)
Masked Language Modeling loss to optimize.
Optional: only if `masked_lm_labels` is not None
prediction_logits: torch.tensor(bs, seq_length, voc_size)
Token prediction logits
all_hidden_states: Tuple[torch.tensor(bs, seq_length, dim)]
Tuple of length n_layers with the hidden states from each layer.
Optional: only if `output_hidden_states`=True
all_attentions: Tuple[torch.tensor(bs, n_heads, seq_length, seq_length)]
Tuple of length n_layers with the attention weights from each layer
Optional: only if `output_attentions`=True
"""
tfmr_output
=
self
.
encoder
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
)
hidden_states
=
tfmr_output
[
0
]
# (bs, seq_length, dim)
...
...
@@ -508,22 +509,7 @@ class DilBertForMaskedLM(DilBertPreTrainedModel):
the pooled output) e.g. for GLUE tasks. """
,
DILBERT_START_DOCSTRING
,
DILBERT_INPUTS_DOCSTRING
)
class
DilBertForSequenceClassification
(
DilBertPreTrainedModel
):
def
__init__
(
self
,
config
):
super
(
DilBertForSequenceClassification
,
self
).
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
dilbert
=
DilBertModel
(
config
)
self
.
pre_classifier
=
nn
.
Linear
(
config
.
dim
,
config
.
dim
)
self
.
classifier
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
self
.
dropout
=
nn
.
Dropout
(
config
.
seq_classif_dropout
)
self
.
apply
(
self
.
init_weights
)
def
forward
(
self
,
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
=
None
,
labels
:
torch
.
tensor
=
None
):
"""
r
"""
Parameters
----------
input_ids: torch.tensor(bs, seq_length)
...
...
@@ -547,6 +533,21 @@ class DilBertForSequenceClassification(DilBertPreTrainedModel):
Tuple of length n_layers with the attention weights from each layer
Optional: only if `output_attentions`=True
"""
def
__init__
(
self
,
config
):
super
(
DilBertForSequenceClassification
,
self
).
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
dilbert
=
DilBertModel
(
config
)
self
.
pre_classifier
=
nn
.
Linear
(
config
.
dim
,
config
.
dim
)
self
.
classifier
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
self
.
dropout
=
nn
.
Dropout
(
config
.
seq_classif_dropout
)
self
.
apply
(
self
.
init_weights
)
def
forward
(
self
,
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
=
None
,
labels
:
torch
.
tensor
=
None
):
dilbert_output
=
self
.
dilbert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
)
pooled_output
=
dilbert_output
[
1
]
# (bs, dim)
...
...
@@ -571,22 +572,7 @@ class DilBertForSequenceClassification(DilBertPreTrainedModel):
the hidden-states output to compute `span start logits` and `span end logits`). """
,
DILBERT_START_DOCSTRING
,
DILBERT_INPUTS_DOCSTRING
)
class
DilBertForQuestionAnswering
(
DilBertPreTrainedModel
):
def
__init__
(
self
,
config
):
super
(
DilBertForQuestionAnswering
,
self
).
__init__
(
config
)
self
.
dilbert
=
DilBertModel
(
config
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
assert
config
.
num_labels
==
2
self
.
dropout
=
nn
.
Dropout
(
config
.
qa_dropout
)
self
.
apply
(
self
.
init_weights
)
def
forward
(
self
,
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
=
None
,
start_positions
:
torch
.
tensor
=
None
,
end_positions
:
torch
.
tensor
=
None
):
"""
r
"""
Parameters
----------
input_ids: torch.tensor(bs, seq_length)
...
...
@@ -620,6 +606,21 @@ class DilBertForQuestionAnswering(DilBertPreTrainedModel):
Tuple of length n_layers with the attention weights from each layer
Optional: only if `output_attentions`=True
"""
def
__init__
(
self
,
config
):
super
(
DilBertForQuestionAnswering
,
self
).
__init__
(
config
)
self
.
dilbert
=
DilBertModel
(
config
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
dim
,
config
.
num_labels
)
assert
config
.
num_labels
==
2
self
.
dropout
=
nn
.
Dropout
(
config
.
qa_dropout
)
self
.
apply
(
self
.
init_weights
)
def
forward
(
self
,
input_ids
:
torch
.
tensor
,
attention_mask
:
torch
.
tensor
=
None
,
start_positions
:
torch
.
tensor
=
None
,
end_positions
:
torch
.
tensor
=
None
):
dilbert_output
=
self
.
dilbert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
)
hidden_states
=
dilbert_output
[
0
]
# (bs, max_query_len, dim)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment