Commit 7fba47b7 authored by thomwolf's avatar thomwolf
Browse files

WIP reordering

parent e25cba78
...@@ -549,7 +549,7 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -549,7 +549,7 @@ class GPT2Model(GPT2PreTrainedModel):
all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),) all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),)
outputs = block(hidden_states, outputs = block(hidden_states,
past=layer_past, layer_past=layer_past,
attention_mask=attention_mask, attention_mask=attention_mask,
head_mask=head_mask[i]) head_mask=head_mask[i])
...@@ -666,7 +666,7 @@ the classification head takes as input the input of a specified classification t ...@@ -666,7 +666,7 @@ the classification head takes as input the input of a specified classification t
""", GPT2_START_DOCSTRING, GPT2_INPUTS_DOCSTRING) """, GPT2_START_DOCSTRING, GPT2_INPUTS_DOCSTRING)
class GPT2DoubleHeadsModel(GPT2PreTrainedModel): class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
r""" r"""
**mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``: **mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
Index of the classification token in each input sequence. Index of the classification token in each input sequence.
Selected in the range ``[0, input_ids.size(-1) - 1[``. Selected in the range ``[0, input_ids.size(-1) - 1[``.
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
......
...@@ -636,7 +636,7 @@ the classification head takes as input the input of a specified classification t ...@@ -636,7 +636,7 @@ the classification head takes as input the input of a specified classification t
""", OPENAI_GPT_START_DOCSTRING, OPENAI_GPT_INPUTS_DOCSTRING) """, OPENAI_GPT_START_DOCSTRING, OPENAI_GPT_INPUTS_DOCSTRING)
class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
r""" r"""
**mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``: **mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
Index of the classification token in each input sequence. Index of the classification token in each input sequence.
Selected in the range ``[0, input_ids.size(-1) - 1[``. Selected in the range ``[0, input_ids.size(-1) - 1[``.
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
...@@ -678,7 +678,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -678,7 +678,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1 mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, mc_token_ids) outputs = model(input_ids, mc_token_ids=mc_token_ids)
lm_prediction_scores, mc_prediction_scores = outputs[:2] lm_prediction_scores, mc_prediction_scores = outputs[:2]
""" """
...@@ -700,7 +700,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -700,7 +700,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
self.transformer.tokens_embed) self.transformer.tokens_embed)
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
lm_labels=None, mc_labels=None): mc_token_ids=None, lm_labels=None, mc_labels=None):
transformer_outputs = self.transformer(input_ids, transformer_outputs = self.transformer(input_ids,
attention_mask=attention_mask, attention_mask=attention_mask,
token_type_ids=token_type_ids, token_type_ids=token_type_ids,
......
...@@ -123,8 +123,8 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -123,8 +123,8 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
token_labels, choice_labels): token_labels, choice_labels):
model = RobertaModel(config=config) model = RobertaModel(config=config)
model.eval() model.eval()
sequence_output, pooled_output = model(input_ids, token_type_ids, input_mask) sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
sequence_output, pooled_output = model(input_ids, token_type_ids) sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids)
sequence_output, pooled_output = model(input_ids) sequence_output, pooled_output = model(input_ids)
result = { result = {
...@@ -140,7 +140,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -140,7 +140,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
token_labels, choice_labels): token_labels, choice_labels):
model = RobertaForMaskedLM(config=config) model = RobertaForMaskedLM(config=config)
model.eval() model.eval()
loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels) loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels)
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment