Commit 4cb48945 authored by thomwolf's avatar thomwolf
Browse files

added doc for openai GPT

parent 62b8eb43
...@@ -154,6 +154,7 @@ class BertConfig(PretrainedConfig): ...@@ -154,6 +154,7 @@ class BertConfig(PretrainedConfig):
:class:`~pytorch_transformers.BertConfig` is the configuration class to store the configuration of a :class:`~pytorch_transformers.BertConfig` is the configuration class to store the configuration of a
`BertModel`. `BertModel`.
Arguments: Arguments:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
hidden_size: Size of the encoder layers and the pooler layer. hidden_size: Size of the encoder layers and the pooler layer.
...@@ -193,31 +194,6 @@ class BertConfig(PretrainedConfig): ...@@ -193,31 +194,6 @@ class BertConfig(PretrainedConfig):
initializer_range=0.02, initializer_range=0.02,
layer_norm_eps=1e-12, layer_norm_eps=1e-12,
**kwargs): **kwargs):
"""Constructs BertConfig.
Arguments:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
hidden_size: Size of the encoder layers and the pooler layer.
num_hidden_layers: Number of hidden layers in the Transformer encoder.
num_attention_heads: Number of attention heads for each attention layer in
the Transformer encoder.
intermediate_size: The size of the "intermediate" (i.e., feed-forward)
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.
max_position_embeddings: The maximum sequence length that this model might
ever be used with. Typically set this to something large just in case
(e.g., 512 or 1024 or 2048).
type_vocab_size: The vocabulary size of the `token_type_ids` passed into
`BertModel`.
initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices.
layer_norm_eps: The epsilon used by LayerNorm.
"""
super(BertConfig, self).__init__(**kwargs) super(BertConfig, self).__init__(**kwargs)
if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2
and isinstance(vocab_size_or_config_json_file, unicode)): and isinstance(vocab_size_or_config_json_file, unicode)):
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment