"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "31dfde74290bbfbbb05a5919eb9424962aeabd99"
Unverified Commit 0b7d053c authored by Valentin's avatar Valentin Committed by GitHub
Browse files

fixes some key names for in LayoutLMv2 / LayoutXLM tokenizers (#14493)

in case of left padding_side there was a copy/paste error
assigning the bbox data to the labels
parent 204d2513
...@@ -1275,7 +1275,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): ...@@ -1275,7 +1275,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
if "bbox" in encoded_inputs: if "bbox" in encoded_inputs:
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"] encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
if "labels" in encoded_inputs: if "labels" in encoded_inputs:
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"] encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
if "special_tokens_mask" in encoded_inputs: if "special_tokens_mask" in encoded_inputs:
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"] encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
......
...@@ -746,7 +746,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): ...@@ -746,7 +746,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
if "bbox" in encoded_inputs: if "bbox" in encoded_inputs:
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"] encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
if "labels" in encoded_inputs: if "labels" in encoded_inputs:
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"] encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
if "special_tokens_mask" in encoded_inputs: if "special_tokens_mask" in encoded_inputs:
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"] encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
......
...@@ -1051,7 +1051,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): ...@@ -1051,7 +1051,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer):
if "bbox" in encoded_inputs: if "bbox" in encoded_inputs:
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"] encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
if "labels" in encoded_inputs: if "labels" in encoded_inputs:
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"] encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
if "special_tokens_mask" in encoded_inputs: if "special_tokens_mask" in encoded_inputs:
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"] encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
......
...@@ -614,7 +614,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast): ...@@ -614,7 +614,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
if "bbox" in encoded_inputs: if "bbox" in encoded_inputs:
encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"] encoded_inputs["bbox"] = [self.pad_token_box] * difference + encoded_inputs["bbox"]
if "labels" in encoded_inputs: if "labels" in encoded_inputs:
encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["bbox"] encoded_inputs["labels"] = [self.pad_token_label] * difference + encoded_inputs["labels"]
if "special_tokens_mask" in encoded_inputs: if "special_tokens_mask" in encoded_inputs:
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"] encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment