"""An abstract class to handle weights initialization and
"""An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
a simple interface for downloading and loading pretrained models.
"""
"""
config_class=XLNetConfig
config_class=XLNetConfig
...
@@ -557,8 +556,7 @@ class XLNetPreTrainedModel(PreTrainedModel):
...
@@ -557,8 +556,7 @@ class XLNetPreTrainedModel(PreTrainedModel):
base_model_prefix="transformer"
base_model_prefix="transformer"
def_init_weights(self,module):
def_init_weights(self,module):
""" Initialize the weights.
"""Initialize the weights."""
"""
ifisinstance(module,(nn.Linear,nn.Embedding)):
ifisinstance(module,(nn.Linear,nn.Embedding)):
# Slightly different from the TF version which uses truncated_normal for initialization
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
# cf https://github.com/pytorch/pytorch/pull/5617
...
@@ -1350,46 +1348,46 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
...
@@ -1350,46 +1348,46 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
return_dict=None,
return_dict=None,
):
):
r"""
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_predict)`, `optional`, defaults to :obj:`None`):
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_predict)`, `optional`, defaults to :obj:`None`):
Labels for masked language modeling.
Labels for masked language modeling.
`num_predict` corresponds to `target_mapping.shape[1]`. If `target_mapping` is `None`, then `num_predict` corresponds to `sequence_length`.
`num_predict` corresponds to `target_mapping.shape[1]`. If `target_mapping` is `None`, then `num_predict` corresponds to `sequence_length`.
The labels should correspond to the masked input words that should be predicted and depends on `target_mapping`. Note in order to perform standard auto-regressive language modeling a `<mask>` token has to be added to the `input_ids` (see `prepare_inputs_for_generation` fn and examples below)
The labels should correspond to the masked input words that should be predicted and depends on `target_mapping`. Note in order to perform standard auto-regressive language modeling a `<mask>` token has to be added to the `input_ids` (see `prepare_inputs_for_generation` fn and examples below)
Indices are selected in ``[-100, 0, ..., config.vocab_size]``
Indices are selected in ``[-100, 0, ..., config.vocab_size]``
All labels set to ``-100`` are ignored, the loss is only
All labels set to ``-100`` are ignored, the loss is only
computed for labels in ``[0, ..., config.vocab_size]``
computed for labels in ``[0, ..., config.vocab_size]``
Return:
Return:
Examples::
Examples::
from transformers import XLNetTokenizer, XLNetLMHeadModel
from transformers import XLNetTokenizer, XLNetLMHeadModel