"...git@developer.sourcefind.cn:OpenDAS/mmdetection3d.git" did not exist on "9611c2d0aae7a1a667a3eecaa92756fea1073f20"
Commit 4447f270 authored by thomwolf's avatar thomwolf
Browse files

updating hub

parent 33d3db5c
...@@ -309,6 +309,28 @@ predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] ...@@ -309,6 +309,28 @@ predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == '.</w>' assert predicted_token == '.</w>'
``` ```
And how to use `OpenAIGPTDoubleHeadsModel`
```python
# Load pre-trained model (weights)
model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
model.eval()
# Prepare tokenized input
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Predict hidden states features for each layer
with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
```
### Transformer-XL ### Transformer-XL
Here is a quick-start example using `TransfoXLTokenizer`, `TransfoXLModel` and `TransfoXLModelLMHeadModel` class with the Transformer-XL model pre-trained on WikiText-103. See the [doc section](#doc) below for all the details on these classes. Here is a quick-start example using `TransfoXLTokenizer`, `TransfoXLModel` and `TransfoXLModelLMHeadModel` class with the Transformer-XL model pre-trained on WikiText-103. See the [doc section](#doc) below for all the details on these classes.
...@@ -456,6 +478,29 @@ predicted_index = torch.argmax(predictions_2[0, -1, :]).item() ...@@ -456,6 +478,29 @@ predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
``` ```
And how to use `GPT2DoubleHeadsModel`
```python
# Load pre-trained model (weights)
model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
model.eval()
# Prepare tokenized input
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Predict hidden states features for each layer
with torch.no_grad():
lm_logits, multiple_choice_logits, past = model(tokens_tensor, mc_token_ids)
```
## Doc ## Doc
Here is a detailed documentation of the classes in the package and how to use them: Here is a detailed documentation of the classes in the package and how to use them:
......
...@@ -23,6 +23,9 @@ bert_docstring = """ ...@@ -23,6 +23,9 @@ bert_docstring = """
. `bert-base-multilingual-uncased` . `bert-base-multilingual-uncased`
. `bert-base-multilingual-cased` . `bert-base-multilingual-cased`
. `bert-base-chinese` . `bert-base-chinese`
. `bert-base-german-cased`
. `bert-large-uncased-whole-word-masking`
. `bert-large-cased-whole-word-masking`
- a path or url to a pretrained model archive containing: - a path or url to a pretrained model archive containing:
. `bert_config.json` a configuration file for the model . `bert_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a BertForPreTraining . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining
...@@ -81,6 +84,7 @@ def bertTokenizer(*args, **kwargs): ...@@ -81,6 +84,7 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"] Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example: Example:
>>> import torch
>>> sentence = 'Hello, World!' >>> sentence = 'Hello, World!'
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>> toks = tokenizer.tokenize(sentence) >>> toks = tokenizer.tokenize(sentence)
...@@ -101,6 +105,7 @@ def bertModel(*args, **kwargs): ...@@ -101,6 +105,7 @@ def bertModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
...@@ -129,6 +134,7 @@ def bertForNextSentencePrediction(*args, **kwargs): ...@@ -129,6 +134,7 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
...@@ -158,6 +164,7 @@ def bertForPreTraining(*args, **kwargs): ...@@ -158,6 +164,7 @@ def bertForPreTraining(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
...@@ -181,6 +188,7 @@ def bertForMaskedLM(*args, **kwargs): ...@@ -181,6 +188,7 @@ def bertForMaskedLM(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
...@@ -222,6 +230,7 @@ def bertForSequenceClassification(*args, **kwargs): ...@@ -222,6 +230,7 @@ def bertForSequenceClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
...@@ -256,6 +265,7 @@ def bertForMultipleChoice(*args, **kwargs): ...@@ -256,6 +265,7 @@ def bertForMultipleChoice(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
...@@ -288,6 +298,7 @@ def bertForQuestionAnswering(*args, **kwargs): ...@@ -288,6 +298,7 @@ def bertForQuestionAnswering(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
...@@ -326,6 +337,7 @@ def bertForTokenClassification(*args, **kwargs): ...@@ -326,6 +337,7 @@ def bertForTokenClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
......
...@@ -11,7 +11,7 @@ gpt2_docstring = """ ...@@ -11,7 +11,7 @@ gpt2_docstring = """
Params: Params:
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of: - a str with the name of a pre-trained model to load selected in the list of:
. `gpt2` . `gpt2`, `gpt2-medium`
- a path or url to a pretrained model archive containing: - a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model . `gpt2_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a GPT2Model instance . `pytorch_model.bin` a PyTorch dump of a GPT2Model instance
...@@ -147,10 +147,14 @@ def gpt2DoubleHeadsModel(*args, **kwargs): ...@@ -147,10 +147,14 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ?" >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> indexed_tokens = tokenizer.encode(text) >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokens_tensor = torch.tensor([indexed_tokens]) >>> tokenized_text1 = tokenizer.tokenize(text1)
>>> mc_token_ids = torch.LongTensor([ [len(indexed_tokens)] ]) >>> tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel # Load gpt2DoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2')
......
...@@ -126,7 +126,7 @@ def openAIGPTLMHeadModel(*args, **kwargs): ...@@ -126,7 +126,7 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
...@@ -161,15 +161,18 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs): ...@@ -161,15 +161,18 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch >>> import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt') >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> tokenized_text1 = tokenizer.tokenize(text1)
>>> tokens_tensor = torch.tensor([indexed_tokens]) >>> tokenized_text2 = tokenizer.tokenize(text2)
>>> mc_token_ids = torch.LongTensor([ [len(tokenized_text)] ]) >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel # Load openAIGPTDoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTDoubleHeadsModel', 'openai-gpt') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment