"vscode:/vscode.git/clone" did not exist on "f63ff536adfee0505a64d02649ef4b55dde614bb"
Unverified Commit 7c59e32d authored by Matt's avatar Matt Committed by GitHub
Browse files

Merge pull request #2 from huggingface/master

Updating my fork to the latest version
parents b8e2a9c5 c304593d
from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import (
BertModel,
BertForNextSentencePrediction,
BertForMaskedLM,
BertForMultipleChoice,
BertForPreTraining,
BertForQuestionAnswering,
BertForSequenceClassification,
BertForTokenClassification,
)
# A lot of models share the same param doc. Use a decorator
# to save typing
bert_docstring = """
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load
. `bert-base-uncased`
. `bert-large-uncased`
. `bert-base-cased`
. `bert-large-cased`
. `bert-base-multilingual-uncased`
. `bert-base-multilingual-cased`
. `bert-base-chinese`
. `bert-base-german-cased`
. `bert-large-uncased-whole-word-masking`
. `bert-large-cased-whole-word-masking`
- a path or url to a pretrained model archive containing:
. `bert_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining
instance
- a path or url to a pretrained model archive containing:
. `bert_config.json` a configuration file for the model
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow
checkpoint
cache_dir: an optional path to a folder in which the pre-trained models
will be cached.
state_dict: an optional state dictionnary
(collections.OrderedDict object) to use instead of Google
pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification)
"""
def _append_from_pretrained_docstring(docstr):
def docstring_decorator(fn):
fn.__doc__ = fn.__doc__ + docstr
return fn
return docstring_decorator
def bertTokenizer(*args, **kwargs):
"""
Instantiate a BertTokenizer from a pre-trained/customized vocab file
Args:
pretrained_model_name_or_path: Path to pretrained model archive
or one of pre-trained vocab configs below.
* bert-base-uncased
* bert-large-uncased
* bert-base-cased
* bert-large-cased
* bert-base-multilingual-uncased
* bert-base-multilingual-cased
* bert-base-chinese
Keyword args:
cache_dir: an optional path to a specific directory to download and cache
the pre-trained model weights.
Default: None
do_lower_case: Whether to lower case the input.
Only has an effect when do_wordpiece_only=False
Default: True
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
Default: True
max_len: An artificial maximum length to truncate tokenized sequences to;
Effective maximum length is always the minimum of this
value (if specified) and the underlying BERT model's
sequence length.
Default: None
never_split: List of tokens which will never be split during tokenization.
Only has an effect when do_wordpiece_only=False
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example:
>>> import torch
>>> sentence = 'Hello, World!'
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>> toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!']
>>> ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125]
"""
tokenizer = BertTokenizer.from_pretrained(*args, **kwargs)
return tokenizer
@_append_from_pretrained_docstring(bert_docstring)
def bertModel(*args, **kwargs):
"""
BertModel is the basic BERT Transformer model with a layer of summed token,
position and sequence embeddings followed by a series of identical
self-attention blocks (12 for BERT-base, 24 for BERT-large).
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids])
# Load bertModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertModel', 'bert-base-cased')
>>> model.eval()
# Predict hidden states features for each layer
>>> with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors)
"""
model = BertModel.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(bert_docstring)
def bertForNextSentencePrediction(*args, **kwargs):
"""
BERT model with next sentence prediction head.
This module comprises the BERT model followed by the next sentence
classification head.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForNextSentencePrediction', 'bert-base-cased')
>>> model.eval()
# Predict the next sentence classification logits
>>> with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
"""
model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(bert_docstring)
def bertForPreTraining(*args, **kwargs):
"""
BERT model with pre-training heads.
This module comprises the BERT model followed by the two pre-training heads
- the masked language modeling head, and
- the next sentence classification head.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForPreTraining', 'bert-base-cased')
>>> masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
"""
model = BertForPreTraining.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(bert_docstring)
def bertForMaskedLM(*args, **kwargs):
"""
BertForMaskedLM includes the BertModel Transformer followed by the
(possibly) pre-trained masked language modeling head.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> masked_index = 8
>>> tokenized_text[masked_index] = '[MASK]'
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForMaskedLM', 'bert-base-cased')
>>> model.eval()
# Predict all tokens
>>> with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors)
>>> predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson'
"""
model = BertForMaskedLM.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(bert_docstring)
def bertForSequenceClassification(*args, **kwargs):
"""
BertForSequenceClassification is a fine-tuning model that includes
BertModel and a sequence-level (sequence or pair of sequences) classifier
on top of the BertModel. Note that the classification head is only initialized
and has to be trained.
The sequence-level classifier is a linear layer that takes as input the
last hidden state of the first character in the input sequence
(see Figures 3a and 3b in the BERT paper).
Args:
num_labels: the number (>=2) of classes for the classifier.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>> model.eval()
# Predict the sequence classification logits
>>> with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss
>>> labels = torch.tensor([1])
>>> seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model = BertForSequenceClassification.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(bert_docstring)
def bertForMultipleChoice(*args, **kwargs):
"""
BertForMultipleChoice is a fine-tuning model that includes BertModel and a
linear layer on top of the BertModel. Note that the multiple choice head is
only initialized and has to be trained.
Args:
num_choices: the number (>=2) of classes for the classifier.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>> segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>> model.eval()
# Predict the multiple choice logits
>>> with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss
>>> labels = torch.tensor([1])
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model = BertForMultipleChoice.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(bert_docstring)
def bertForQuestionAnswering(*args, **kwargs):
"""
BertForQuestionAnswering is a fine-tuning model that includes BertModel
with a token-level classifiers on top of the full sequence of last hidden
states. Note that the classification head is only initialized
and has to be trained.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForQuestionAnswering', 'bert-base-cased')
>>> model.eval()
# Predict the start and end positions logits
>>> with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>> start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
"""
model = BertForQuestionAnswering.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(bert_docstring)
def bertForTokenClassification(*args, **kwargs):
"""
BertForTokenClassification is a fine-tuning model that includes BertModel
and a token-level classifier on top of the BertModel. Note that the classification
head is only initialized and has to be trained.
The token-level classifier is a linear layer that takes as input the last
hidden state of the sequence.
Args:
num_labels: the number (>=2) of classes for the classifier.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>> model.eval()
# Predict the token classification logits
>>> with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss
>>> labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>> classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model = BertForTokenClassification.from_pretrained(*args, **kwargs)
return model
from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer
from pytorch_pretrained_bert.modeling_gpt2 import (
GPT2Model,
GPT2LMHeadModel,
GPT2DoubleHeadsModel
)
# A lot of models share the same param doc. Use a decorator
# to save typing
gpt2_docstring = """
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `gpt2`, `gpt2-medium`
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a GPT2Model instance
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. a TensorFlow checkpoint with trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific GPT-2 class
"""
def _append_from_pretrained_docstring(docstr):
def docstring_decorator(fn):
fn.__doc__ = fn.__doc__ + docstr
return fn
return docstring_decorator
def gpt2Tokenizer(*args, **kwargs):
"""
Instantiate a GPT-2 BPE tokenizer for OpenAI GPT-2 from a pre-trained/customized vocab file.
Peculiarities:
- Byte-level BPE
Args:
pretrained_model_name_or_path: Path to pretrained model archive
or one of pre-trained vocab configs below.
* gpt2
Keyword args:
special_tokens: Special tokens in vocabulary that are not pretrained ([SEP], [CLS]...)
Default: None
max_len: An artificial maximum length to truncate tokenized sequences to;
Effective maximum length is always the minimum of this
value (if specified) and the underlying BERT model's
sequence length.
Default: None
Example:
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
>>> text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs)
return tokenizer
@_append_from_pretrained_docstring(gpt2_docstring)
def gpt2Model(*args, **kwargs):
"""
gpt2Model is the basic OpenAI GPT-2 Transformer model based on
identical stacked masked self-attention blocks and pre-trained
on large scale dataset using language modeling signal.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Model', 'gpt2')
>>> model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past)
"""
model = GPT2Model.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(gpt2_docstring)
def gpt2LMHeadModel(*args, **kwargs):
"""
gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head on top.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2LMHeadModel', 'gpt2')
>>> model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad():
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who'
"""
model = GPT2LMHeadModel.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(gpt2_docstring)
def gpt2DoubleHeadsModel(*args, **kwargs):
"""
gpt2DoubleHeadsModel is the OpenAI GPT-2 Transformer model with the
tied (pre-trained) language modeling head and a multiple choice
classification head (only initialized, not pre-trained).
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokenized_text1 = tokenizer.tokenize(text1)
>>> tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2')
>>> model.eval()
# Predict hidden states features for each layer
>>> with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
"""
model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs)
return model
This diff is collapsed.
from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer
from pytorch_pretrained_bert.modeling_transfo_xl import (
TransfoXLModel,
TransfoXLLMHeadModel
)
# A lot of models share the same param doc. Use a decorator
# to save typing
transformer_xl_docstring = """
Transformer XL use a relative positioning (with sinusiodal patterns) and adaptive softmax inputs which means that:
- you don't need to specify positioning embeddings indices
- the tokens in the vocabulary have to be sorted to decreasing frequency.
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `transfo-xl-wt103`
- a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a TransfoXLModel instance
- a path or url to a pretrained model archive containing:
. `transfo_xl_config.json` a configuration file for the model
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
"""
def _append_from_pretrained_docstring(docstr):
def docstring_decorator(fn):
fn.__doc__ = fn.__doc__ + docstr
return fn
return docstring_decorator
def transformerXLTokenizer(*args, **kwargs):
"""
Instantiate a Transformer-XL tokenizer adapted from Vocab class in https://github.com/kimiyoung/transformer-xl
Args:
pretrained_model_name_or_path: Path to pretrained model archive
or one of pre-trained vocab configs below.
* transfo-xl-wt103
Example:
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>> text = "Who was Jim Henson ?"
>>> tokenized_text = tokenizer.tokenize(tokenized_text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
"""
tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs)
return tokenizer
@_append_from_pretrained_docstring(transformer_xl_docstring)
def transformerXLModel(*args, **kwargs):
"""
transformerXLModel is the basic Transformer XL model.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLModel', 'transfo-xl-wt103')
>>> model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
"""
model = TransfoXLModel.from_pretrained(*args, **kwargs)
return model
@_append_from_pretrained_docstring(transformer_xl_docstring)
def transformerXLLMHeadModel(*args, **kwargs):
"""
transformerXLModel is the basic Transformer XL model with the
tied (pre-trained) language modeling head on top.
Example:
# Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>> model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>> assert predicted_token == 'who'
"""
model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs)
return model
__version__ = "0.6.1" __version__ = "0.6.2"
from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_openai import OpenAIGPTTokenizer from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus) from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
...@@ -15,7 +15,7 @@ from .modeling_openai import (OpenAIGPTConfig, OpenAIGPTModel, ...@@ -15,7 +15,7 @@ from .modeling_openai import (OpenAIGPTConfig, OpenAIGPTModel,
from .modeling_transfo_xl import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel, from .modeling_transfo_xl import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel,
load_tf_weights_in_transfo_xl) load_tf_weights_in_transfo_xl)
from .modeling_gpt2 import (GPT2Config, GPT2Model, from .modeling_gpt2 import (GPT2Config, GPT2Model,
GPT2LMHeadModel, GPT2DoubleHeadsModel, GPT2LMHeadModel, GPT2DoubleHeadsModel, GPT2MultipleChoiceHead,
load_tf_weights_in_gpt2) load_tf_weights_in_gpt2)
from .optimization import BertAdam from .optimization import BertAdam
......
...@@ -22,6 +22,15 @@ import requests ...@@ -22,6 +22,15 @@ import requests
from botocore.exceptions import ClientError from botocore.exceptions import ClientError
from tqdm import tqdm from tqdm import tqdm
try:
from torch.hub import _get_torch_home
torch_cache_home = _get_torch_home()
except ImportError:
torch_cache_home = os.path.expanduser(
os.getenv('TORCH_HOME', os.path.join(
os.getenv('XDG_CACHE_HOME', '~/.cache'), 'torch')))
default_cache_path = os.path.join(torch_cache_home, 'pytorch_pretrained_bert')
try: try:
from urllib.parse import urlparse from urllib.parse import urlparse
except ImportError: except ImportError:
...@@ -29,11 +38,11 @@ except ImportError: ...@@ -29,11 +38,11 @@ except ImportError:
try: try:
from pathlib import Path from pathlib import Path
PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', PYTORCH_PRETRAINED_BERT_CACHE = Path(
Path.home() / '.pytorch_pretrained_bert')) os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', default_cache_path))
except (AttributeError, ImportError): except (AttributeError, ImportError):
PYTORCH_PRETRAINED_BERT_CACHE = os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', PYTORCH_PRETRAINED_BERT_CACHE = os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
os.path.join(os.path.expanduser("~"), '.pytorch_pretrained_bert')) default_cache_path)
CONFIG_NAME = "config.json" CONFIG_NAME = "config.json"
WEIGHTS_NAME = "pytorch_model.bin" WEIGHTS_NAME = "pytorch_model.bin"
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment