Unverified Commit e02f95b2 authored by Nathan Glenn's avatar Nathan Glenn Committed by GitHub
Browse files

remove references to PDF reading via PIL (#15293)

* fix confusing PIL instructions

As stated in the documentation
[here](https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html?highlight=pdf#write-only-formats

),
PIL can only write PDF's, not read them. Remove references to reading
PDF's via PIL from this page to avoid confusion.

* mention PDF in doc examples using PIL
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Be explicit: PDFs must be converted to images

* fix formatting
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>
parent 3dc82427
...@@ -85,7 +85,9 @@ follows: ...@@ -85,7 +85,9 @@ follows:
```python ```python
from PIL import Image from PIL import Image
image = Image.open("name_of_your_document - can be a png file, pdf, etc.") image = Image.open(
"name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
)
width, height = image.size width, height = image.size
``` ```
...@@ -157,7 +159,9 @@ from PIL import Image ...@@ -157,7 +159,9 @@ from PIL import Image
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") image = Image.open(
"name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
).convert("RGB")
encoding = processor( encoding = processor(
image, return_tensors="pt" image, return_tensors="pt"
) # you can also add all tokenizer parameters here such as padding, truncation ) # you can also add all tokenizer parameters here such as padding, truncation
...@@ -177,7 +181,9 @@ from PIL import Image ...@@ -177,7 +181,9 @@ from PIL import Image
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") image = Image.open(
"name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
).convert("RGB")
words = ["hello", "world"] words = ["hello", "world"]
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
encoding = processor(image, words, boxes=boxes, return_tensors="pt") encoding = processor(image, words, boxes=boxes, return_tensors="pt")
...@@ -199,7 +205,9 @@ from PIL import Image ...@@ -199,7 +205,9 @@ from PIL import Image
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") image = Image.open(
"name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
).convert("RGB")
words = ["hello", "world"] words = ["hello", "world"]
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
word_labels = [1, 2] word_labels = [1, 2]
...@@ -219,7 +227,9 @@ from PIL import Image ...@@ -219,7 +227,9 @@ from PIL import Image
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") image = Image.open(
"name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
).convert("RGB")
question = "What's his name?" question = "What's his name?"
encoding = processor(image, question, return_tensors="pt") encoding = processor(image, question, return_tensors="pt")
print(encoding.keys()) print(encoding.keys())
...@@ -237,7 +247,9 @@ from PIL import Image ...@@ -237,7 +247,9 @@ from PIL import Image
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") image = Image.open(
"name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
).convert("RGB")
question = "What's his name?" question = "What's his name?"
words = ["hello", "world"] words = ["hello", "world"]
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment