Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
6e011690
"vscode:/vscode.git/clone" did not exist on "af4b98ed97ec9d10c22c45e033f8dd2c0da3b69e"
Commit
6e011690
authored
Oct 27, 2019
by
Lorenzo Ampil
Browse files
Add special tokens to documentation for the rest of pytorch model examples #1561
parent
3a52b657
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
25 additions
and
25 deletions
+25
-25
transformers/modeling_ctrl.py
transformers/modeling_ctrl.py
+2
-2
transformers/modeling_distilbert.py
transformers/modeling_distilbert.py
+4
-4
transformers/modeling_gpt2.py
transformers/modeling_gpt2.py
+2
-2
transformers/modeling_openai.py
transformers/modeling_openai.py
+2
-2
transformers/modeling_roberta.py
transformers/modeling_roberta.py
+3
-3
transformers/modeling_transfo_xl.py
transformers/modeling_transfo_xl.py
+2
-2
transformers/modeling_xlm.py
transformers/modeling_xlm.py
+5
-5
transformers/modeling_xlnet.py
transformers/modeling_xlnet.py
+5
-5
No files found.
transformers/modeling_ctrl.py
View file @
6e011690
...
@@ -261,7 +261,7 @@ class CTRLModel(CTRLPreTrainedModel):
...
@@ -261,7 +261,7 @@ class CTRLModel(CTRLPreTrainedModel):
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
model = CTRLModel.from_pretrained('ctrl')
model = CTRLModel.from_pretrained('ctrl')
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -438,7 +438,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
...
@@ -438,7 +438,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
tokenizer = CTRLTokenizer.from_pretrained('ctrl')
model = CTRLLMHeadModel.from_pretrained('ctrl')
model = CTRLLMHeadModel.from_pretrained('ctrl')
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
loss, logits = outputs[:2]
...
...
transformers/modeling_distilbert.py
View file @
6e011690
...
@@ -411,7 +411,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
...
@@ -411,7 +411,7 @@ class DistilBertModel(DistilBertPreTrainedModel):
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -495,7 +495,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
...
@@ -495,7 +495,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased')
model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids)
outputs = model(input_ids, masked_lm_labels=input_ids)
loss, prediction_scores = outputs[:2]
loss, prediction_scores = outputs[:2]
...
@@ -569,7 +569,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
...
@@ -569,7 +569,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
loss, logits = outputs[:2]
...
@@ -643,7 +643,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
...
@@ -643,7 +643,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
...
...
transformers/modeling_gpt2.py
View file @
6e011690
...
@@ -338,7 +338,7 @@ class GPT2Model(GPT2PreTrainedModel):
...
@@ -338,7 +338,7 @@ class GPT2Model(GPT2PreTrainedModel):
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -503,7 +503,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
...
@@ -503,7 +503,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
loss, logits = outputs[:2]
...
...
transformers/modeling_openai.py
View file @
6e011690
...
@@ -343,7 +343,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
...
@@ -343,7 +343,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTModel.from_pretrained('openai-gpt')
model = OpenAIGPTModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -478,7 +478,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
...
@@ -478,7 +478,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
loss, logits = outputs[:2]
...
...
transformers/modeling_roberta.py
View file @
6e011690
...
@@ -154,7 +154,7 @@ class RobertaModel(BertModel):
...
@@ -154,7 +154,7 @@ class RobertaModel(BertModel):
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaModel.from_pretrained('roberta-base')
model = RobertaModel.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -209,7 +209,7 @@ class RobertaForMaskedLM(BertPreTrainedModel):
...
@@ -209,7 +209,7 @@ class RobertaForMaskedLM(BertPreTrainedModel):
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForMaskedLM.from_pretrained('roberta-base')
model = RobertaForMaskedLM.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids)
outputs = model(input_ids, masked_lm_labels=input_ids)
loss, prediction_scores = outputs[:2]
loss, prediction_scores = outputs[:2]
...
@@ -303,7 +303,7 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
...
@@ -303,7 +303,7 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
loss, logits = outputs[:2]
...
...
transformers/modeling_transfo_xl.py
View file @
6e011690
...
@@ -578,7 +578,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
...
@@ -578,7 +578,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states, mems = outputs[:2]
last_hidden_states, mems = outputs[:2]
...
@@ -808,7 +808,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
...
@@ -808,7 +808,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
prediction_scores, mems = outputs[:2]
prediction_scores, mems = outputs[:2]
...
...
transformers/modeling_xlm.py
View file @
6e011690
...
@@ -332,7 +332,7 @@ class XLMModel(XLMPreTrainedModel):
...
@@ -332,7 +332,7 @@ class XLMModel(XLMPreTrainedModel):
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -607,7 +607,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
...
@@ -607,7 +607,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -671,7 +671,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
...
@@ -671,7 +671,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
loss, logits = outputs[:2]
...
@@ -754,7 +754,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
...
@@ -754,7 +754,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
...
@@ -849,7 +849,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
...
@@ -849,7 +849,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
...
...
transformers/modeling_xlnet.py
View file @
6e011690
...
@@ -584,7 +584,7 @@ class XLNetModel(XLNetPreTrainedModel):
...
@@ -584,7 +584,7 @@ class XLNetModel(XLNetPreTrainedModel):
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -900,7 +900,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
...
@@ -900,7 +900,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context.
# We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>"
, add_special_tokens=True
)).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
...
@@ -983,7 +983,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
...
@@ -983,7 +983,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
loss, logits = outputs[:2]
...
@@ -1163,7 +1163,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
...
@@ -1163,7 +1163,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
...
@@ -1276,7 +1276,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
...
@@ -1276,7 +1276,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute"
, add_special_tokens=True
)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment