Unverified Commit 416fdbad authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix LayoutLMv2 init issue and doctest (#30278)



* fix

* try suggestion

* update

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent d179b9dc
...@@ -503,6 +503,9 @@ class LayoutLMv2PreTrainedModel(PreTrainedModel): ...@@ -503,6 +503,9 @@ class LayoutLMv2PreTrainedModel(PreTrainedModel):
elif isinstance(module, nn.LayerNorm): elif isinstance(module, nn.LayerNorm):
module.bias.data.zero_() module.bias.data.zero_()
module.weight.data.fill_(1.0) module.weight.data.fill_(1.0)
elif isinstance(module, LayoutLMv2Model):
if hasattr(module, "visual_segment_embedding"):
module.visual_segment_embedding.data.normal_(mean=0.0, std=self.config.initializer_range)
def my_convert_sync_batchnorm(module, process_group=None): def my_convert_sync_batchnorm(module, process_group=None):
...@@ -822,7 +825,7 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel): ...@@ -822,7 +825,7 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
>>> import torch >>> import torch
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> set_seed(88) >>> set_seed(0)
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
...@@ -993,7 +996,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel): ...@@ -993,7 +996,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
>>> import torch >>> import torch
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> set_seed(88) >>> set_seed(0)
>>> dataset = load_dataset("rvl_cdip", split="train", streaming=True) >>> dataset = load_dataset("rvl_cdip", split="train", streaming=True)
>>> data = next(iter(dataset)) >>> data = next(iter(dataset))
...@@ -1012,8 +1015,8 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel): ...@@ -1012,8 +1015,8 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
>>> loss, logits = outputs.loss, outputs.logits >>> loss, logits = outputs.loss, outputs.logits
>>> predicted_idx = logits.argmax(dim=-1).item() >>> predicted_idx = logits.argmax(dim=-1).item()
>>> predicted_answer = dataset.info.features["label"].names[4] >>> predicted_answer = dataset.info.features["label"].names[4]
>>> predicted_idx, predicted_answer >>> predicted_idx, predicted_answer # results are not good without further fine-tuning
(4, 'advertisement') (7, 'advertisement')
``` ```
""" """
...@@ -1172,7 +1175,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel): ...@@ -1172,7 +1175,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
>>> from PIL import Image >>> from PIL import Image
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> set_seed(88) >>> set_seed(0)
>>> datasets = load_dataset("nielsr/funsd", split="test") >>> datasets = load_dataset("nielsr/funsd", split="test")
>>> labels = datasets.features["ner_tags"].feature.names >>> labels = datasets.features["ner_tags"].feature.names
...@@ -1203,8 +1206,8 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel): ...@@ -1203,8 +1206,8 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
>>> predicted_token_class_ids = logits.argmax(-1) >>> predicted_token_class_ids = logits.argmax(-1)
>>> predicted_tokens_classes = [id2label[t.item()] for t in predicted_token_class_ids[0]] >>> predicted_tokens_classes = [id2label[t.item()] for t in predicted_token_class_ids[0]]
>>> predicted_tokens_classes[:5] >>> predicted_tokens_classes[:5] # results are not good without further fine-tuning
['B-ANSWER', 'B-HEADER', 'B-HEADER', 'B-HEADER', 'B-HEADER'] ['I-HEADER', 'I-HEADER', 'I-QUESTION', 'I-HEADER', 'I-QUESTION']
``` ```
""" """
...@@ -1314,7 +1317,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel): ...@@ -1314,7 +1317,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
>>> from PIL import Image >>> from PIL import Image
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> set_seed(88) >>> set_seed(0)
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased") >>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
...@@ -1328,12 +1331,12 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel): ...@@ -1328,12 +1331,12 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
>>> predicted_start_idx = outputs.start_logits.argmax(-1).item() >>> predicted_start_idx = outputs.start_logits.argmax(-1).item()
>>> predicted_end_idx = outputs.end_logits.argmax(-1).item() >>> predicted_end_idx = outputs.end_logits.argmax(-1).item()
>>> predicted_start_idx, predicted_end_idx >>> predicted_start_idx, predicted_end_idx
(154, 287) (30, 191)
>>> predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1] >>> predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1]
>>> predicted_answer = processor.tokenizer.decode(predicted_answer_tokens) >>> predicted_answer = processor.tokenizer.decode(predicted_answer_tokens)
>>> predicted_answer # results are not very good without further fine-tuning >>> predicted_answer # results are not good without further fine-tuning
'council mem - bers conducted by trrf treasurer philip g. kuehn to get answers which the public ... '44 a. m. to 12 : 25 p. m. 12 : 25 to 12 : 58 p. m. 12 : 58 to 4 : 00 p. m. 2 : 00 to 5 : 00 p. m. coffee break coffee will be served for men and women in the lobby adjacent to exhibit area. please move into exhibit area. ( exhibits open ) trrf general session ( part | ) presiding : lee a. waller trrf vice president “ introductory remarks ” lee a. waller, trrf vice presi - dent individual interviews with trrf public board members and sci - entific advisory council mem - bers conducted by trrf treasurer philip g. kuehn to get answers which the public refrigerated warehousing industry is looking for. plus questions from'
``` ```
```python ```python
...@@ -1343,7 +1346,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel): ...@@ -1343,7 +1346,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
>>> predicted_answer_span_start = outputs.start_logits.argmax(-1).item() >>> predicted_answer_span_start = outputs.start_logits.argmax(-1).item()
>>> predicted_answer_span_end = outputs.end_logits.argmax(-1).item() >>> predicted_answer_span_end = outputs.end_logits.argmax(-1).item()
>>> predicted_answer_span_start, predicted_answer_span_end >>> predicted_answer_span_start, predicted_answer_span_end
(154, 287) (30, 191)
``` ```
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment