Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
cdf0f2fe
Commit
cdf0f2fe
authored
Jun 01, 2019
by
VictorSanh
Browse files
fix typo/presentation
parent
8f97f6c5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
97 additions
and
97 deletions
+97
-97
hubconfs/gpt2_hubconf.py
hubconfs/gpt2_hubconf.py
+53
-53
hubconfs/transformer_xl_hubconf.py
hubconfs/transformer_xl_hubconf.py
+44
-44
No files found.
hubconfs/gpt2_hubconf.py
View file @
cdf0f2fe
from
pytorch_pretrained_bert.tokenization_gpt2
import
GPT2Tokenizer
from
pytorch_pretrained_bert.modeling_gpt2
import
(
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
# A lot of models share the same param doc. Use a decorator
# to save typing
gpt2_docstring
=
"""
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `gpt2`
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a GPT2Model instance
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. a TensorFlow checkpoint with trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific GPT-2 class
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `gpt2`
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a GPT2Model instance
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. a TensorFlow checkpoint with trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific GPT-2 class
"""
...
...
@@ -35,27 +35,27 @@ def _append_from_pretrained_docstring(docstr):
def
gpt2Tokenizer
(
*
args
,
**
kwargs
):
"""
Instantiate a GPT-2 BPE tokenizer for OpenAI GPT-2 from a pre-trained/customized vocab file.
Peculiarities:
Peculiarities:
- Byte-level BPE
Args:
pretrained_model_name_or_path: Path to pretrained model archive
or one of pre-trained vocab configs below.
*
openai-
gpt
* gpt
2
Keyword args:
special_tokens: Special tokens in vocabulary that are not pretrained ([SEP], [CLS]...)
Default: None
max_len: An artificial maximum length to truncate tokenized sequences to;
Effective maximum length is always the minimum of this
special_tokens: Special tokens in vocabulary that are not pretrained ([SEP], [CLS]...)
Default: None
max_len: An artificial maximum length to truncate tokenized sequences to;
Effective maximum length is always the minimum of this
value (if specified) and the underlying BERT model's
sequence length.
Default: None
Default: None
Example:
>>> import torch
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
>>> text = "Who was Jim Henson ?"
>>> text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -66,31 +66,31 @@ def gpt2Tokenizer(*args, **kwargs):
def
gpt2Model
(
*
args
,
**
kwargs
):
"""
gpt2Model is the basic OpenAI GPT-2 Transformer model based on
identical stacked masked self-attention blocks and pre-trained
on large scale dataset using language modeling signal.
identical stacked masked self-attention blocks and pre-trained
on large scale dataset using language modeling signal.
Example:
# Load the tokenizer
>>> import torch
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Model', 'gpt2')
>>> model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past)
hidden_states_2, past = model(tokens_tensor_2, past=past)
"""
model
=
GPT2Model
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -100,34 +100,34 @@ def gpt2Model(*args, **kwargs):
def
gpt2LMHeadModel
(
*
args
,
**
kwargs
):
"""
gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head on top.
tied (pre-trained) language modeling head on top.
Example:
Example:
# Load the tokenizer
>>> import torch
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2LMHeadModel', 'gpt2')
>>> model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad():
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index])
# Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who'
"""
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -138,19 +138,19 @@ def gpt2LMHeadModel(*args, **kwargs):
def
gpt2DoubleHeadsModel
(
*
args
,
**
kwargs
):
"""
gpt2DoubleHeadsModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head and a multiple choice
classification head (only initialized, not pre-trained).
tied (pre-trained) language modeling head and a multiple choice
classification head (only initialized, not pre-trained).
Example:
Example:
# Load the tokenizer
>>> import torch
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>> text = "Who was Jim Henson ?"
>>> text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(text)
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> mc_token_ids = torch.LongTensor([ [len(indexed_tokens)] ])
>>> mc_token_ids = torch.LongTensor([ [len(indexed_tokens)] ])
# Load gpt2DoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2')
...
...
hubconfs/transformer_xl_hubconf.py
View file @
cdf0f2fe
from
pytorch_pretrained_bert.tokenization_transfo_xl
import
TransfoXLTokenizer
from
pytorch_pretrained_bert.modeling_transfo_xl
import
(
TransfoXLModel
,
TransfoXLLMHeadModel
TransfoXLModel
,
TransfoXLLMHeadModel
)
# A lot of models share the same param doc. Use a decorator
...
...
@@ -11,20 +11,20 @@ transformer_xl_docstring = """
- you don't need to specify positioning embeddings indices
- the tokens in the vocabulary have to be sorted to decreasing frequency.
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `transfo-xl-wt103`
- a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a TransfoXLModel instance
- a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `transfo-xl-wt103`
- a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a TransfoXLModel instance
- a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
"""
...
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103
Example:
>>> import torch
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>> text = "Who was Jim Henson ?"
>>> text = "Who was Jim Henson ?"
>>> tokenized_text = tokenizer.tokenize(tokenized_text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
"""
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -60,33 +60,33 @@ def transformerXLTokenizer(*args, **kwargs):
def
transformerXLModel
(
*
args
,
**
kwargs
):
"""
gpt2Model is the basic OpenAI GPT-2 Transformer model based on
identical stacked masked self-attention blocks and pre-trained
on large scale dataset using language modeling signal.
identical stacked masked self-attention blocks and pre-trained
on large scale dataset using language modeling signal.
Example:
# Load the tokenizer
>>> import torch
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLModel', 'transfo-xl-wt103')
>>> model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
# We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
"""
model
=
TransfoXLModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -96,37 +96,37 @@ def transformerXLModel(*args, **kwargs):
def
transformerXLLMHeadModel
(
*
args
,
**
kwargs
):
"""
gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head on top.
tied (pre-trained) language modeling head on top.
Example:
Example:
# Load the tokenizer
>>> import torch
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>> model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
# We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>> assert predicted_token == 'who'
# Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>> assert predicted_token == 'who'
"""
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment