Wrap up configurations

a1cb1004 · Lysandre · Lysandre Debut · c11b6fd3 · a1cb1004 · a1cb1004
Commit a1cb1004 authored Jan 13, 2020 by Lysandre Committed by Lysandre Debut Jan 14, 2020
4 changed files
--- a/src/transformers/configuration_gpt2.py
+++ b/src/transformers/configuration_gpt2.py
@@ -69,6 +69,31 @@ class GPT2Config(PretrainedConfig):
                The epsilon to use in the layer normalization layers
            initializer_range (:obj:`float`, optional, defaults to 16):
                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+            summary_type (:obj:`string`, optional, defaults to "cls_index"):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.GPT2DoubleHeadsModel`.
+                Is one of the following options:
+                    - 'last' => take the last token hidden state (like XLNet)
+                    - 'first' => take the first token hidden state (like Bert)
+                    - 'mean' => take the mean of all tokens hidden states
+                    - 'cls_index' => supply a Tensor of classification token position (GPT/GPT-2)
+                    - 'attn' => Not implemented now, use multi-head attention
+            summary_use_proj (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.GPT2DoubleHeadsModel`.
+                Add a projection after the vector extraction
+            summary_activation (:obj:`string` or :obj:`None`, optional, defaults to :obj:`None`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.GPT2DoubleHeadsModel`.
+                'tanh' => add a tanh activation to the output, Other => no activation.
+            summary_proj_to_labels (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.GPT2DoubleHeadsModel`.
+                If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
+            summary_first_dropout (:obj:`float`, optional, defaults to 0.1):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.GPT2DoubleHeadsModel`.
+                Add a dropout before the projection and activation
    """
    pretrained_config_archive_map = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP

--- a/src/transformers/configuration_openai.py
+++ b/src/transformers/configuration_openai.py
@@ -69,6 +69,31 @@ class OpenAIGPTConfig(PretrainedConfig):
                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
            predict_special_tokens (:obj:`boolean`, optional, defaults to :obj:`True`):
                Whether special tokens should be predicted when the model is has a language modeling head.
+            summary_type (:obj:`string`, optional, defaults to "cls_index"):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.OpenAIGPTDoubleHeadsModel`.
+                Is one of the following options:
+                    - 'last' => take the last token hidden state (like XLNet)
+                    - 'first' => take the first token hidden state (like Bert)
+                    - 'mean' => take the mean of all tokens hidden states
+                    - 'cls_index' => supply a Tensor of classification token position (GPT/GPT-2)
+                    - 'attn' => Not implemented now, use multi-head attention
+            summary_use_proj (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.OpenAIGPTDoubleHeadsModel`.
+                Add a projection after the vector extraction
+            summary_activation (:obj:`string` or :obj:`None`, optional, defaults to :obj:`None`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.OpenAIGPTDoubleHeadsModel`.
+                'tanh' => add a tanh activation to the output, Other => no activation.
+            summary_proj_to_labels (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.OpenAIGPTDoubleHeadsModel`.
+                If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
+            summary_first_dropout (:obj:`float`, optional, defaults to 0.1):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.OpenAIGPTDoubleHeadsModel`.
+                Add a dropout before the projection and activation
    """
    pretrained_config_archive_map = OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP

--- a/src/transformers/configuration_xlm.py
+++ b/src/transformers/configuration_xlm.py
@@ -103,10 +103,35 @@ class XLMConfig(PretrainedConfig):
                The index of the masking token in the vocabulary.
            is_encoder(:obj:`boolean`, optional, defaults to :obj:`True`):
                Whether the initialized model should be a transformer encoder or decoder as seen in Vaswani et al.
+            summary_type (:obj:`string`, optional, defaults to "first"):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLMForSequenceClassification`.
+                Is one of the following options:
+                    - 'last' => take the last token hidden state (like XLNet)
+                    - 'first' => take the first token hidden state (like Bert)
+                    - 'mean' => take the mean of all tokens hidden states
+                    - 'cls_index' => supply a Tensor of classification token position (GPT/GPT-2)
+                    - 'attn' => Not implemented now, use multi-head attention
+            summary_use_proj (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLMForSequenceClassification`.
+                Add a projection after the vector extraction
+            summary_activation (:obj:`string` or :obj:`None`, optional, defaults to :obj:`None`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLMForSequenceClassification`.
+                'tanh' => add a tanh activation to the output, Other => no activation.
+            summary_proj_to_labels (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLMForSequenceClassification`.
+                If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
+            summary_first_dropout (:obj:`float`, optional, defaults to 0.1):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLMForSequenceClassification`.
+                Add a dropout before the projection and activation
            start_n_top (:obj:`int`, optional, defaults to 5):
-                TODO
+                Used in the SQuAD evaluation script for XLM and XLNetV.
            end_n_top (:obj:`int`, optional, defaults to 5):
-                TODO
+                Used in the SQuAD evaluation script for XLM and XLNet.
            mask_token_id (:obj:`int`, optional, defaults to 0):
                Model agnostic parameter to identify masked tokens when generating text in an MLM context.
            lang_id (:obj:`int`, optional, defaults to 1):

--- a/src/transformers/configuration_xlnet.py
+++ b/src/transformers/configuration_xlnet.py
@@ -80,10 +80,35 @@ class XLNetConfig(PretrainedConfig):
                Setting this attribute to -1 means no clamping.
            same_length (:obj:`boolean`, optional, defaults to :obj:`False`):
                Whether to use the same attention length for each token.
+            summary_type (:obj:`string`, optional, defaults to "last"):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:transformers.XLNetForSequenceClassification` and :class:`~transformers.XLNetForMultipleChoice`.
+                Is one of the following options:
+                    - 'last' => take the last token hidden state (like XLNet)
+                    - 'first' => take the first token hidden state (like Bert)
+                    - 'mean' => take the mean of all tokens hidden states
+                    - 'cls_index' => supply a Tensor of classification token position (GPT/GPT-2)
+                    - 'attn' => Not implemented now, use multi-head attention
+            summary_use_proj (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLNetForSequenceClassification` and :class:`~transformers.XLNetForMultipleChoice`.
+                Add a projection after the vector extraction
+            summary_activation (:obj:`string` or :obj:`None`, optional, defaults to :obj:`None`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLNetForSequenceClassification` and :class:`~transformers.XLNetForMultipleChoice`.
+                'tanh' => add a tanh activation to the output, Other => no activation.
+            summary_proj_to_labels (:obj:`boolean`, optional, defaults to :obj:`True`):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLNetForSequenceClassification` and :class:`~transformers.XLNetForMultipleChoice`.
+                If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
+            summary_last_dropout (:obj:`float`, optional, defaults to 0.1):
+                Argument used when doing sequence summary. Used in for the multiple choice head in
+                :class:`~transformers.XLNetForSequenceClassification` and :class:`~transformers.XLNetForMultipleChoice`.
+                Add a dropout after the projection and activation
            start_n_top (:obj:`int`, optional, defaults to 5):
-                TODO
+                Used in the SQuAD evaluation script for XLM and XLNetV.
            end_n_top (:obj:`int`, optional, defaults to 5):
-                TODO
+                Used in the SQuAD evaluation script for XLM and XLNet.
    """
    pretrained_config_archive_map = XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP