test(unitest): Restore unit test cases

90cf1082 · myhloli · 8e981b3a · 90cf1082 · 90cf1082 · 90cf1082
Commit 90cf1082 authored Nov 18, 2024 by myhloli
20 changed files
--- a/tests/unittest/test_data/__init__.py
+++ b/tests/unittest/test_data/__init__.py
--- a/tests/unittest/test_data/assets/jsonl/test_01.jsonl
+++ b/tests/unittest/test_data/assets/jsonl/test_01.jsonl
+{"track_id":"e8824f5a-9fcb-4ee5-b2d4-6bf2c67019dc","path":"s3://sci-hub/enbook-scimag/78800000/libgen.scimag78872000-78872999/10.1017/cbo9780511770425.012.pdf","file_type":"pdf","content_type":"application/pdf","content_length":80078,"title":"German Idealism and the Concept of Punishment || Conclusion","remark":{"file_id":"scihub_78800000/libgen.scimag78872000-78872999.zip_10.1017/cbo9780511770425.012","file_source_type":"paper","original_file_id":"10.1017/cbo9780511770425.012","file_name":"10.1017/cbo9780511770425.012.pdf","author":"Merle, Jean-Christophe"}}
--- a/tests/unittest/test_data/assets/jsonl/test_02.jsonl
+++ b/tests/unittest/test_data/assets/jsonl/test_02.jsonl
+{"track_id":"e8824f5a-9fcb-4ee5-b2d4-6bf2c67019dc","path":"tests/test_data/assets/pdfs/test_02.pdf","file_type":"pdf","content_type":"application/pdf","content_length":80078,"title":"German Idealism and the Concept of Punishment || Conclusion","remark":{"file_id":"scihub_78800000/libgen.scimag78872000-78872999.zip_10.1017/cbo9780511770425.012","file_source_type":"paper","original_file_id":"10.1017/cbo9780511770425.012","file_name":"10.1017/cbo9780511770425.012.pdf","author":"Merle, Jean-Christophe"}}
--- a/tests/unittest/test_data/assets/pdfs/test_01.pdf
+++ b/tests/unittest/test_data/assets/pdfs/test_01.pdf
--- a/tests/unittest/test_data/assets/pdfs/test_02.pdf
+++ b/tests/unittest/test_data/assets/pdfs/test_02.pdf
--- a/tests/unittest/test_data/assets/pngs/test_01.png
+++ b/tests/unittest/test_data/assets/pngs/test_01.png
--- a/tests/unittest/test_data/assets/pngs/test_02.png
+++ b/tests/unittest/test_data/assets/pngs/test_02.png
--- a/tests/unittest/test_data/data_reader_writer/__init__.py
+++ b/tests/unittest/test_data/data_reader_writer/__init__.py
--- a/tests/unittest/test_data/data_reader_writer/test_filebase.py
+++ b/tests/unittest/test_data/data_reader_writer/test_filebase.py
+import os
+import shutil
+from magic_pdf.data.data_reader_writer import (FileBasedDataReader,
+                                               FileBasedDataWriter)
+def test_filebased_reader_writer():
+    unitest_dir = '/tmp/magic_pdf/unittest/data/filebased_reader_writer'
+    sub_dir = os.path.join(unitest_dir, 'sub')
+    abs_fn = os.path.join(unitest_dir, 'abspath.txt')
+    os.makedirs(sub_dir, exist_ok=True)
+    writer = FileBasedDataWriter(sub_dir)
+    reader = FileBasedDataReader(sub_dir)
+    writer.write('test.txt', b'hello world')
+    assert reader.read('test.txt') == b'hello world'
+    writer.write(abs_fn, b'hello world')
+    assert reader.read(abs_fn) == b'hello world'
+    shutil.rmtree(unitest_dir)
--- a/tests/unittest/test_data/data_reader_writer/test_multi_bucket_s3.py
+++ b/tests/unittest/test_data/data_reader_writer/test_multi_bucket_s3.py
+import json
+import os
+import fitz
+import pytest
+from magic_pdf.data.data_reader_writer import (MultiBucketS3DataReader,
+                                               MultiBucketS3DataWriter)
+from magic_pdf.data.schemas import S3Config
+@pytest.mark.skipif(
+    os.getenv('S3_ACCESS_KEY_2', None) is None, reason='need s3 config!'
+)
+def test_multi_bucket_s3_reader_writer():
+    """test multi bucket s3 reader writer must config s3 config in the
+    environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export
+    S3_SECRET_KEY=xxx export S3_ENDPOINT=xxx.
+    export S3_BUCKET_2=xxx export S3_ACCESS_KEY_2=xxx export S3_SECRET_KEY_2=xxx export S3_ENDPOINT_2=xxx
+    """
+    bucket = os.getenv('S3_BUCKET', '')
+    ak = os.getenv('S3_ACCESS_KEY', '')
+    sk = os.getenv('S3_SECRET_KEY', '')
+    endpoint_url = os.getenv('S3_ENDPOINT', '')
+    bucket_2 = os.getenv('S3_BUCKET_2', '')
+    ak_2 = os.getenv('S3_ACCESS_KEY_2', '')
+    sk_2 = os.getenv('S3_SECRET_KEY_2', '')
+    endpoint_url_2 = os.getenv('S3_ENDPOINT_2', '')
+    s3configs = [
+        S3Config(
+            bucket_name=bucket, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
+        ),
+        S3Config(
+            bucket_name=bucket_2,
+            access_key=ak_2,
+            secret_key=sk_2,
+            endpoint_url=endpoint_url_2,
+        ),
+    ]
+    reader = MultiBucketS3DataReader(bucket, s3configs)
+    writer = MultiBucketS3DataWriter(bucket, s3configs)
+    bits = reader.read('meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl')
+    assert bits == reader.read(
+        f's3://{bucket}/meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl'
+    )
+    bits = reader.read(
+        f's3://{bucket_2}/enbook-scimag/78800000/libgen.scimag78872000-78872999/10.1017/cbo9780511770425.012.pdf'
+    )
+    docs = fitz.open('pdf', bits)
+    assert len(docs) == 10
+    bits = reader.read(
+        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl?bytes=566,713'
+    )
+    assert bits == reader.read_at(
+        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl', 566, 713
+    )
+    assert len(json.loads(bits)) > 0
+    writer.write_string(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt', 'abc'
+    )
+    assert 'abc'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt'
+    )
+    writer.write(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt',
+        '123'.encode(),
+    )
+    assert '123'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt'
+    )
+@pytest.mark.skipif(
+    os.getenv('S3_ACCESS_KEY_2', None) is None, reason='need s3 config!'
+)
+def test_multi_bucket_s3_reader_writer_with_prefix():
+    """test multi bucket s3 reader writer must config s3 config in the
+    environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export
+    S3_SECRET_KEY=xxx export S3_ENDPOINT=xxx.
+    export S3_BUCKET_2=xxx export S3_ACCESS_KEY_2=xxx export S3_SECRET_KEY_2=xxx export S3_ENDPOINT_2=xxx
+    """
+    bucket = os.getenv('S3_BUCKET', '')
+    ak = os.getenv('S3_ACCESS_KEY', '')
+    sk = os.getenv('S3_SECRET_KEY', '')
+    endpoint_url = os.getenv('S3_ENDPOINT', '')
+    bucket_2 = os.getenv('S3_BUCKET_2', '')
+    ak_2 = os.getenv('S3_ACCESS_KEY_2', '')
+    sk_2 = os.getenv('S3_SECRET_KEY_2', '')
+    endpoint_url_2 = os.getenv('S3_ENDPOINT_2', '')
+    s3configs = [
+        S3Config(
+            bucket_name=bucket, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
+        ),
+        S3Config(
+            bucket_name=bucket_2,
+            access_key=ak_2,
+            secret_key=sk_2,
+            endpoint_url=endpoint_url_2,
+        ),
+    ]
+    prefix = 'meta-index'
+    reader = MultiBucketS3DataReader(f'{bucket}/{prefix}', s3configs)
+    writer = MultiBucketS3DataWriter(f'{bucket}/{prefix}', s3configs)
+    bits = reader.read('scihub/v001/scihub/part-66210c190659-000026.jsonl')
+    assert bits == reader.read(
+        f's3://{bucket}/{prefix}/scihub/v001/scihub/part-66210c190659-000026.jsonl'
+    )
+    bits = reader.read(
+        f's3://{bucket_2}/enbook-scimag/78800000/libgen.scimag78872000-78872999/10.1017/cbo9780511770425.012.pdf'
+    )
+    docs = fitz.open('pdf', bits)
+    assert len(docs) == 10
+    bits = reader.read(
+        'scihub/v001/scihub/part-66210c190659-000026.jsonl?bytes=566,713'
+    )
+    assert bits == reader.read_at(
+        'scihub/v001/scihub/part-66210c190659-000026.jsonl', 566, 713
+    )
+    assert len(json.loads(bits)) > 0
+    writer.write_string(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt', 'abc'
+    )
+    assert 'abc'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt'
+    )
+    assert 'abc'.encode() == reader.read(
+        f's3://{bucket}/{prefix}/unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt'
+    )
+    writer.write(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt',
+        '123'.encode(),
+    )
+    assert '123'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt'
+    )
--- a/tests/unittest/test_data/data_reader_writer/test_s3.py
+++ b/tests/unittest/test_data/data_reader_writer/test_s3.py
+import json
+import os
+import pytest
+from magic_pdf.data.data_reader_writer import S3DataReader, S3DataWriter
+@pytest.mark.skipif(
+    os.getenv('S3_ACCESS_KEY', None) is None, reason='need s3 config!'
+)
+def test_s3_reader_writer():
+    """test multi bucket s3 reader writer must config s3 config in the
+    environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export
+    S3_SECRET_KEY=xxx export S3_ENDPOINT=xxx."""
+    bucket = os.getenv('S3_BUCKET', '')
+    ak = os.getenv('S3_ACCESS_KEY', '')
+    sk = os.getenv('S3_SECRET_KEY', '')
+    endpoint_url = os.getenv('S3_ENDPOINT', '')
+    reader = S3DataReader('', bucket, ak, sk, endpoint_url)
+    writer = S3DataWriter('', bucket, ak, sk, endpoint_url)
+    bits = reader.read('meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl')
+    assert bits == reader.read(
+        f's3://{bucket}/meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl'
+    )
+    bits = reader.read(
+        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl?bytes=566,713'
+    )
+    assert bits == reader.read_at(
+        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl', 566, 713
+    )
+    assert len(json.loads(bits)) > 0
+    writer.write_string(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt', 'abc'
+    )
+    assert 'abc'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt'
+    )
+    writer.write(
+        f'{bucket}/unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt',
+        '123'.encode(),
+    )
+    assert '123'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt'
+    )
+@pytest.mark.skipif(
+    os.getenv('S3_ACCESS_KEY', None) is None, reason='need s3 config!'
+)
+def test_s3_reader_writer_with_prefix():
+    """test multi bucket s3 reader writer must config s3 config in the
+    environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export
+    S3_SECRET_KEY=xxx export S3_ENDPOINT=xxx."""
+    bucket = os.getenv('S3_BUCKET', '')
+    ak = os.getenv('S3_ACCESS_KEY', '')
+    sk = os.getenv('S3_SECRET_KEY', '')
+    endpoint_url = os.getenv('S3_ENDPOINT', '')
+    prefix = 'meta-index'
+    reader = S3DataReader(prefix, bucket, ak, sk, endpoint_url)
+    writer = S3DataWriter(prefix, bucket, ak, sk, endpoint_url)
+    bits = reader.read('scihub/v001/scihub/part-66210c190659-000026.jsonl')
+    assert bits == reader.read(
+        f's3://{bucket}/{prefix}/scihub/v001/scihub/part-66210c190659-000026.jsonl'
+    )
+    bits = reader.read(
+        'scihub/v001/scihub/part-66210c190659-000026.jsonl?bytes=566,713'
+    )
+    assert bits == reader.read_at(
+        'scihub/v001/scihub/part-66210c190659-000026.jsonl', 566, 713
+    )
+    assert len(json.loads(bits)) > 0
+    writer.write_string(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt', 'abc'
+    )
+    assert 'abc'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt'
+    )
+    assert 'abc'.encode() == reader.read(
+        f's3://{bucket}/{prefix}/unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt'
+    )
+    writer.write(
+        f'{bucket}/{prefix}/unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt',
+        '123'.encode(),
+    )
+    assert '123'.encode() == reader.read(
+        'unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt'
+    )
--- a/tests/unittest/test_data/io/__init__.py
+++ b/tests/unittest/test_data/io/__init__.py
--- a/tests/unittest/test_data/io/test_s3.py
+++ b/tests/unittest/test_data/io/test_s3.py
+import json
+import os
+import pytest
+from magic_pdf.data.io.s3 import S3Reader, S3Writer
+@pytest.mark.skipif(
+    os.getenv('S3_ACCESS_KEY', None) is None, reason='s3 config not found'
+)
+def test_s3_reader():
+    """test s3 reader.
+    must config s3 config in the environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export S3_SECRET_KEY=xxx
+    export S3_ENDPOINT=xxx
+    """
+    bucket = os.getenv('S3_BUCKET', '')
+    ak = os.getenv('S3_ACCESS_KEY', '')
+    sk = os.getenv('S3_SECRET_KEY', '')
+    endpoint_url = os.getenv('S3_ENDPOINT', '')
+    reader = S3Reader(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
+    bits = reader.read(
+        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl'
+    )
+    assert len(bits) > 0
+    bits = reader.read_at(
+        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl',
+        566,
+        713,
+    )
+    assert len(json.loads(bits)) > 0
+@pytest.mark.skipif(
+    os.getenv('S3_ACCESS_KEY', None) is None, reason='s3 config not found'
+)
+def test_s3_writer():
+    """test s3 reader.
+    must config s3 config in the environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export S3_SECRET_KEY=xxx
+    export S3_ENDPOINT=xxx
+    """
+    bucket = os.getenv('S3_BUCKET', '')
+    ak = os.getenv('S3_ACCESS_KEY', '')
+    sk = os.getenv('S3_SECRET_KEY', '')
+    endpoint_url = os.getenv('S3_ENDPOINT', '')
+    writer = S3Writer(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
+    test_fn = 'unittest/io/test.jsonl'
+    writer.write(test_fn, '123'.encode())
+    reader = S3Reader(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
+    bits = reader.read(test_fn)
+    assert bits.decode() == '123'
--- a/tests/unittest/test_data/test_dataset.py
+++ b/tests/unittest/test_data/test_dataset.py
+from magic_pdf.data.dataset import ImageDataset, PymuDocDataset
+def test_pymudataset():
+    with open('tests/test_data/assets/pdfs/test_01.pdf', 'rb') as f:
+        bits = f.read()
+    datasets = PymuDocDataset(bits)
+    assert len(datasets) > 0
+    assert datasets.get_page(0).get_page_info().h > 100
+def test_imagedataset():
+    with open('tests/test_data/assets/pngs/test_01.png', 'rb') as f:
+        bits = f.read()
+    datasets = ImageDataset(bits)
+    assert len(datasets) == 1
+    assert datasets.get_page(0).get_page_info().w > 100
--- a/tests/unittest/test_data/test_read_api.py
+++ b/tests/unittest/test_data/test_read_api.py
+import os
+import pytest
+from magic_pdf.data.data_reader_writer import MultiBucketS3DataReader
+from magic_pdf.data.read_api import (read_jsonl, read_local_images,
+                                     read_local_pdfs)
+from magic_pdf.data.schemas import S3Config
+def test_read_local_pdfs():
+    datasets = read_local_pdfs('tests/test_data/assets/pdfs')
+    assert len(datasets) == 2
+    assert len(datasets[0]) > 0
+    assert len(datasets[1]) > 0
+    assert datasets[0].get_page(0).get_page_info().w > 0
+    assert datasets[0].get_page(0).get_page_info().h > 0
+def test_read_local_images():
+    datasets = read_local_images('tests/test_data/assets/pngs', suffixes=['png'])
+    assert len(datasets) == 2
+    assert len(datasets[0]) == 1
+    assert len(datasets[1]) == 1
+    assert datasets[0].get_page(0).get_page_info().w > 0
+    assert datasets[0].get_page(0).get_page_info().h > 0
+@pytest.mark.skipif(
+    os.getenv('S3_ACCESS_KEY_2', None) is None, reason='need s3 config!'
+)
+def test_read_json():
+    """test multi bucket s3 reader writer must config s3 config in the
+    environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export
+    S3_SECRET_KEY=xxx export S3_ENDPOINT=xxx.
+    export S3_BUCKET_2=xxx export S3_ACCESS_KEY_2=xxx export S3_SECRET_KEY_2=xxx export S3_ENDPOINT_2=xxx
+    """
+    bucket = os.getenv('S3_BUCKET', '')
+    ak = os.getenv('S3_ACCESS_KEY', '')
+    sk = os.getenv('S3_SECRET_KEY', '')
+    endpoint_url = os.getenv('S3_ENDPOINT', '')
+    bucket_2 = os.getenv('S3_BUCKET_2', '')
+    ak_2 = os.getenv('S3_ACCESS_KEY_2', '')
+    sk_2 = os.getenv('S3_SECRET_KEY_2', '')
+    endpoint_url_2 = os.getenv('S3_ENDPOINT_2', '')
+    s3configs = [
+        S3Config(
+            bucket_name=bucket, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
+        ),
+        S3Config(
+            bucket_name=bucket_2,
+            access_key=ak_2,
+            secret_key=sk_2,
+            endpoint_url=endpoint_url_2,
+        ),
+    ]
+    reader = MultiBucketS3DataReader(bucket, s3configs)
+    datasets = read_jsonl(
+        f's3://{bucket}/meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl',
+        reader,
+    )
+    assert len(datasets) > 0
+    assert len(datasets[0]) == 10
+    datasets = read_jsonl('tests/test_data/assets/jsonl/test_01.jsonl', reader)
+    assert len(datasets) == 1
+    assert len(datasets[0]) == 10
+    datasets = read_jsonl('tests/test_data/assets/jsonl/test_02.jsonl')
+    assert len(datasets) == 1
+    assert len(datasets[0]) == 1
--- a/tests/unittest/test_integrations/test_rag/assets/middle.json
+++ b/tests/unittest/test_integrations/test_rag/assets/middle.json
+{
+    "pdf_info": [
+        {
+            "preproc_blocks": [
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        57,
+                        299,
+                        93
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                47,
+                                57,
+                                299,
+                                68
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        57,
+                                        298,
+                                        68
+                                    ],
+                                    "score": 0.98,
+                                    "content": "of the synthetic stereo scene from a single camera perspective",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                71,
+                                299,
+                                80
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        71,
+                                        299,
+                                        80
+                                    ],
+                                    "score": 0.96,
+                                    "content": "along with the ground truth disparity,occlusion map,and",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                82,
+                                123,
+                                93
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        82,
+                                        123,
+                                        93
+                                    ],
+                                    "score": 0.99,
+                                    "content": "discontinuitymap.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        47,
+                        100,
+                        301,
+                        535
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                51,
+                                100,
+                                292,
+                                484
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        51,
+                                        100,
+                                        292,
+                                        484
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                51,
+                                                100,
+                                                292,
+                                                484
+                                            ],
+                                            "score": 0.9999815225601196,
+                                            "type": "image",
+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                488,
+                                301,
+                                535
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        490,
+                                        299,
+                                        499
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                49,
+                                                490,
+                                                299,
+                                                499
+                                            ],
+                                            "score": 1.0,
+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        501,
+                                        300,
+                                        512
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                501,
+                                                300,
+                                                512
+                                            ],
+                                            "score": 1.0,
+                                            "content": "quence (1st row), along with their corresponding ground truth",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        513,
+                                        299,
+                                        523
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                513,
+                                                299,
+                                                523
+                                            ],
+                                            "score": 0.98,
+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        525,
+                                        110,
+                                        535
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                525,
+                                                110,
+                                                535
+                                            ],
+                                            "score": 0.99,
+                                            "content": "map (4th row).",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        549,
+                        299,
+                        678
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                549,
+                                299,
+                                558
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        58,
+                                        549,
+                                        298,
+                                        558
+                                    ],
+                                    "score": 0.98,
+                                    "content": "Theresultsof temporalstereomatching aregiveninFigure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                561,
+                                299,
+                                570
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        47,
+                                        561,
+                                        298,
+                                        570
+                                    ],
+                                    "score": 0.98,
+                                    "content": "3foruniformadditivenoiseconfinedtotherangesof±O",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                573,
+                                299,
+                                582
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        573,
+                                        299,
+                                        582
+                                    ],
+                                    "score": 0.96,
+                                    "content": "±20, and ±40. Each performance plot is given as a function",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                585,
+                                299,
+                                594
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        585,
+                                        299,
+                                        594
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of the feedback coefficient X. As with the majority of temporal",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                597,
+                                299,
+                                606
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        597,
+                                        299,
+                                        606
+                                    ],
+                                    "score": 0.99,
+                                    "content": "stereomatching methods,improvements are negligible when",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                609,
+                                299,
+                                618
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        609,
+                                        299,
+                                        618
+                                    ],
+                                    "score": 0.97,
+                                    "content": "no noise is added to the images [1o], [19]. This is largely due",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                621,
+                                299,
+                                629
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        621,
+                                        299,
+                                        629
+                                    ],
+                                    "score": 1.0,
+                                    "content": "tothefactthatthevideousedtoevaluatethesemethodsis",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                633,
+                                299,
+                                641
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        633,
+                                        299,
+                                        641
+                                    ],
+                                    "score": 1.0,
+                                    "content": "computergeneratedwithverylittlenoisetostartwith,thus",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                644,
+                                299,
+                                654
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        644,
+                                        299,
+                                        654
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the noise suppression achieved with temporal stereo matching",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                657,
+                                299,
+                                666
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        657,
+                                        299,
+                                        666
+                                    ],
+                                    "score": 0.98,
+                                    "content": "showslittletonoimprovementovermethodsthatoperate on",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                669,
+                                113,
+                                678
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        669,
+                                        113,
+                                        678
+                                    ],
+                                    "score": 1.0,
+                                    "content": "pairsofimages.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        680,
+                        299,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                680,
+                                299,
+                                690
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        59,
+                                        680,
+                                        298,
+                                        690
+                                    ],
+                                    "score": 0.97,
+                                    "content": "Significantimprovementsin accuracy canbeseenin Figure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                692,
+                                299,
+                                701
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        692,
+                                        298,
+                                        701
+                                    ],
+                                    "score": 0.97,
+                                    "content": "3 when the noise has ranges of ±20, and ±40.In this scenario",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                703,
+                                299,
+                                714
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        703,
+                                        299,
+                                        714
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the effect of noise in the current frame is reduced by increasing",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                716,
+                                299,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        716,
+                                        299,
+                                        725
+                                    ],
+                                    "score": 0.96,
+                                    "content": "thefeedbackcoefficientX.Thisincreasing ofXhas theeffect",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        55,
+                        564,
+                        371
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                55,
+                                538,
+                                305
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        55,
+                                        538,
+                                        305
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                55,
+                                                538,
+                                                305
+                                            ],
+                                            "score": 0.9999905824661255,
+                                            "type": "image",
+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                311,
+                                564,
+                                371
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        313,
+                                        562,
+                                        322
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                313,
+                                                562,
+                                                322
+                                            ],
+                                            "score": 0.97,
+                                            "content": "Figure 3: Performance of temporal matching at different levels",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        325,
+                                        561,
+                                        334
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                325,
+                                                561,
+                                                334
+                                            ],
+                                            "score": 0.98,
+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        336,
+                                        563,
+                                        347
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                336,
+                                                563,
+                                                347
+                                            ],
+                                            "score": 0.99,
+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        348,
+                                        561,
+                                        358
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                348,
+                                                561,
+                                                358
+                                            ],
+                                            "score": 0.96,
+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        360,
+                                        535,
+                                        371
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                360,
+                                                535,
+                                                371
+                                            ],
+                                            "score": 0.96,
+                                            "content": "values of MSE obtained without temporal aggregation.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        418,
+                        563,
+                        666
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                418,
+                                549,
+                                623
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        418,
+                                        549,
+                                        623
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                418,
+                                                549,
+                                                623
+                                            ],
+                                            "score": 0.9999067783355713,
+                                            "type": "image",
+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                630,
+                                563,
+                                666
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        631,
+                                        562,
+                                        641
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                631,
+                                                562,
+                                                641
+                                            ],
+                                            "score": 0.94,
+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        644,
+                                        561,
+                                        652
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                644,
+                                                561,
+                                                652
+                                            ],
+                                            "score": 0.97,
+                                            "content": "responding to the smallest mean squared error (MSE)of the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        655,
+                                        513,
+                                        665
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                655,
+                                                513,
+                                                665
+                                            ],
+                                            "score": 0.97,
+                                            "content": "disparity estimates for a range of noise strengths.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        311,
+                        692,
+                        563,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                311,
+                                692,
+                                563,
+                                702
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        692,
+                                        562,
+                                        702
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of averaging out noise in the per-pixel costs by selecting",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                704,
+                                563,
+                                713
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        704,
+                                        562,
+                                        713
+                                    ],
+                                    "score": 0.98,
+                                    "content": "matches based more heavily upon the auxiliary cost, which",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                716,
+                                563,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        716,
+                                        563,
+                                        725
+                                    ],
+                                    "score": 0.97,
+                                    "content": "is essentially a much more stable running average of the cost",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "layout_bboxes": [
+                {
+                    "layout_bbox": [
+                        47,
+                        55,
+                        301,
+                        726
+                    ],
+                    "layout_label": "V",
+                    "sub_layout": []
+                },
+                {
+                    "layout_bbox": [
+                        310,
+                        55,
+                        564,
+                        726
+                    ],
+                    "layout_label": "V",
+                    "sub_layout": []
+                }
+            ],
+            "page_idx": 0,
+            "page_size": [
+                612.0,
+                792.0
+            ],
+            "_layout_tree": [
+                {
+                    "layout_bbox": [
+                        0,
+                        55,
+                        612.0,
+                        726
+                    ],
+                    "layout_label": "V",
+                    "sub_layout": [
+                        {
+                            "layout_bbox": [
+                                47,
+                                55,
+                                564,
+                                726
+                            ],
+                            "layout_label": "H",
+                            "sub_layout": [
+                                {
+                                    "layout_bbox": [
+                                        47,
+                                        55,
+                                        301,
+                                        726
+                                    ],
+                                    "layout_label": "V",
+                                    "sub_layout": []
+                                },
+                                {
+                                    "layout_bbox": [
+                                        310,
+                                        55,
+                                        564,
+                                        726
+                                    ],
+                                    "layout_label": "V",
+                                    "sub_layout": []
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "images": [
+                {
+                    "type": "image",
+                    "bbox": [
+                        47,
+                        100,
+                        301,
+                        535
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                51,
+                                100,
+                                292,
+                                484
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        51,
+                                        100,
+                                        292,
+                                        484
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                51,
+                                                100,
+                                                292,
+                                                484
+                                            ],
+                                            "score": 0.9999815225601196,
+                                            "type": "image",
+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                488,
+                                301,
+                                535
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        490,
+                                        299,
+                                        499
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                49,
+                                                490,
+                                                299,
+                                                499
+                                            ],
+                                            "score": 1.0,
+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        501,
+                                        300,
+                                        512
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                501,
+                                                300,
+                                                512
+                                            ],
+                                            "score": 1.0,
+                                            "content": "quence (1st row), along with their corresponding ground truth",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        513,
+                                        299,
+                                        523
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                513,
+                                                299,
+                                                523
+                                            ],
+                                            "score": 0.98,
+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        525,
+                                        110,
+                                        535
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                525,
+                                                110,
+                                                535
+                                            ],
+                                            "score": 0.99,
+                                            "content": "map (4th row).",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        55,
+                        564,
+                        371
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                55,
+                                538,
+                                305
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        55,
+                                        538,
+                                        305
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                55,
+                                                538,
+                                                305
+                                            ],
+                                            "score": 0.9999905824661255,
+                                            "type": "image",
+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                311,
+                                564,
+                                371
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        313,
+                                        562,
+                                        322
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                313,
+                                                562,
+                                                322
+                                            ],
+                                            "score": 0.97,
+                                            "content": "Figure 3: Performance of temporal matching at different levels",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        325,
+                                        561,
+                                        334
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                325,
+                                                561,
+                                                334
+                                            ],
+                                            "score": 0.98,
+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        336,
+                                        563,
+                                        347
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                336,
+                                                563,
+                                                347
+                                            ],
+                                            "score": 0.99,
+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        348,
+                                        561,
+                                        358
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                348,
+                                                561,
+                                                358
+                                            ],
+                                            "score": 0.96,
+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        360,
+                                        535,
+                                        371
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                360,
+                                                535,
+                                                371
+                                            ],
+                                            "score": 0.96,
+                                            "content": "values of MSE obtained without temporal aggregation.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        418,
+                        563,
+                        666
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                418,
+                                549,
+                                623
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        418,
+                                        549,
+                                        623
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                418,
+                                                549,
+                                                623
+                                            ],
+                                            "score": 0.9999067783355713,
+                                            "type": "image",
+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                630,
+                                563,
+                                666
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        631,
+                                        562,
+                                        641
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                631,
+                                                562,
+                                                641
+                                            ],
+                                            "score": 0.94,
+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        644,
+                                        561,
+                                        652
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                644,
+                                                561,
+                                                652
+                                            ],
+                                            "score": 0.97,
+                                            "content": "responding to the smallest mean squared error (MSE)of the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        655,
+                                        513,
+                                        665
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                655,
+                                                513,
+                                                665
+                                            ],
+                                            "score": 0.97,
+                                            "content": "disparity estimates for a range of noise strengths.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ],
+            "tables": [],
+            "interline_equations": [],
+            "discarded_blocks": [],
+            "need_drop": false,
+            "drop_reason": [],
+            "para_blocks": [
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        57,
+                        299,
+                        93
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                47,
+                                57,
+                                299,
+                                68
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        57,
+                                        298,
+                                        68
+                                    ],
+                                    "score": 0.98,
+                                    "content": "of the synthetic stereo scene from a single camera perspective",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                71,
+                                299,
+                                80
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        71,
+                                        299,
+                                        80
+                                    ],
+                                    "score": 0.96,
+                                    "content": "along with the ground truth disparity,occlusion map,and",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                82,
+                                123,
+                                93
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        82,
+                                        123,
+                                        93
+                                    ],
+                                    "score": 0.99,
+                                    "content": "discontinuitymap.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        47,
+                        100,
+                        301,
+                        535
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                51,
+                                100,
+                                292,
+                                484
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        51,
+                                        100,
+                                        292,
+                                        484
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                51,
+                                                100,
+                                                292,
+                                                484
+                                            ],
+                                            "score": 0.9999815225601196,
+                                            "type": "image",
+                                            "image_path": "b07d74524eac6f46b5505b48b1e10db23f2b45cb2d21d5fec72e967e61255811.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                488,
+                                301,
+                                535
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        490,
+                                        299,
+                                        499
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                49,
+                                                490,
+                                                299,
+                                                499
+                                            ],
+                                            "score": 1.0,
+                                            "content": "Figure2:Twosampleframesfromthesyntheticvideose-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        501,
+                                        300,
+                                        512
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                501,
+                                                300,
+                                                512
+                                            ],
+                                            "score": 1.0,
+                                            "content": "quence (1st row), along with their corresponding ground truth",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        513,
+                                        299,
+                                        523
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                513,
+                                                299,
+                                                523
+                                            ],
+                                            "score": 0.98,
+                                            "content": "disparity (2nd row), occlusion map (3rd row), and discontinuity",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        48,
+                                        525,
+                                        110,
+                                        535
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                48,
+                                                525,
+                                                110,
+                                                535
+                                            ],
+                                            "score": 0.99,
+                                            "content": "map (4th row).",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        549,
+                        299,
+                        678
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                549,
+                                299,
+                                558
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        58,
+                                        549,
+                                        298,
+                                        558
+                                    ],
+                                    "score": 0.98,
+                                    "content": "Theresultsof temporalstereomatching aregiveninFigure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                561,
+                                299,
+                                570
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        47,
+                                        561,
+                                        298,
+                                        570
+                                    ],
+                                    "score": 0.98,
+                                    "content": "3foruniformadditivenoiseconfinedtotherangesof±O",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                573,
+                                299,
+                                582
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        573,
+                                        299,
+                                        582
+                                    ],
+                                    "score": 0.96,
+                                    "content": "±20, and ±40. Each performance plot is given as a function",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                585,
+                                299,
+                                594
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        585,
+                                        299,
+                                        594
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of the feedback coefficient X. As with the majority of temporal",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                597,
+                                299,
+                                606
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        49,
+                                        597,
+                                        299,
+                                        606
+                                    ],
+                                    "score": 0.99,
+                                    "content": "stereomatching methods,improvements are negligible when",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                609,
+                                299,
+                                618
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        609,
+                                        299,
+                                        618
+                                    ],
+                                    "score": 0.97,
+                                    "content": "no noise is added to the images [1o], [19]. This is largely due",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                621,
+                                299,
+                                629
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        621,
+                                        299,
+                                        629
+                                    ],
+                                    "score": 1.0,
+                                    "content": "tothefactthatthevideousedtoevaluatethesemethodsis",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                633,
+                                299,
+                                641
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        633,
+                                        299,
+                                        641
+                                    ],
+                                    "score": 1.0,
+                                    "content": "computergeneratedwithverylittlenoisetostartwith,thus",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                644,
+                                299,
+                                654
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        644,
+                                        299,
+                                        654
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the noise suppression achieved with temporal stereo matching",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                657,
+                                299,
+                                666
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        657,
+                                        299,
+                                        666
+                                    ],
+                                    "score": 0.98,
+                                    "content": "showslittletonoimprovementovermethodsthatoperate on",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                669,
+                                113,
+                                678
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        669,
+                                        113,
+                                        678
+                                    ],
+                                    "score": 1.0,
+                                    "content": "pairsofimages.",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        47,
+                        680,
+                        299,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                58,
+                                680,
+                                299,
+                                690
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        59,
+                                        680,
+                                        298,
+                                        690
+                                    ],
+                                    "score": 0.97,
+                                    "content": "Significantimprovementsin accuracy canbeseenin Figure",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                692,
+                                299,
+                                701
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        692,
+                                        298,
+                                        701
+                                    ],
+                                    "score": 0.97,
+                                    "content": "3 when the noise has ranges of ±20, and ±40.In this scenario",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                703,
+                                299,
+                                714
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        703,
+                                        299,
+                                        714
+                                    ],
+                                    "score": 0.98,
+                                    "content": "the effect of noise in the current frame is reduced by increasing",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                47,
+                                716,
+                                299,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        48,
+                                        716,
+                                        299,
+                                        725
+                                    ],
+                                    "score": 0.96,
+                                    "content": "thefeedbackcoefficientX.Thisincreasing ofXhas theeffect",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        55,
+                        564,
+                        371
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                55,
+                                538,
+                                305
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        55,
+                                        538,
+                                        305
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                55,
+                                                538,
+                                                305
+                                            ],
+                                            "score": 0.9999905824661255,
+                                            "type": "image",
+                                            "image_path": "c7539af438972442d0f86aa46409e6684338ddfd1fbfd6bdacf02220853ccb55.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                311,
+                                564,
+                                371
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        313,
+                                        562,
+                                        322
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                313,
+                                                562,
+                                                322
+                                            ],
+                                            "score": 0.97,
+                                            "content": "Figure 3: Performance of temporal matching at different levels",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        325,
+                                        561,
+                                        334
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                325,
+                                                561,
+                                                334
+                                            ],
+                                            "score": 0.98,
+                                            "content": "of uniformly distributed image noise{±0,±20,±40}.Mean",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        336,
+                                        563,
+                                        347
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                336,
+                                                563,
+                                                347
+                                            ],
+                                            "score": 0.99,
+                                            "content": "squared error (MSE) of disparities is plotted versus the values",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        348,
+                                        561,
+                                        358
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                348,
+                                                561,
+                                                358
+                                            ],
+                                            "score": 0.96,
+                                            "content": "of the feedback coefficient X. Dashed lines correspond to the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        311,
+                                        360,
+                                        535,
+                                        371
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                311,
+                                                360,
+                                                535,
+                                                371
+                                            ],
+                                            "score": 0.96,
+                                            "content": "values of MSE obtained without temporal aggregation.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "image",
+                    "bbox": [
+                        310,
+                        418,
+                        563,
+                        666
+                    ],
+                    "blocks": [
+                        {
+                            "bbox": [
+                                314,
+                                418,
+                                549,
+                                623
+                            ],
+                            "type": "image_body",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        314,
+                                        418,
+                                        549,
+                                        623
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                314,
+                                                418,
+                                                549,
+                                                623
+                                            ],
+                                            "score": 0.9999067783355713,
+                                            "type": "image",
+                                            "image_path": "9ac4db9197801de4a20dbc9ea17bc0c53afb7290dc8b5b45d9e92e830566cb14.jpg"
+                                        }
+                                    ]
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                310,
+                                630,
+                                563,
+                                666
+                            ],
+                            "type": "image_caption",
+                            "lines": [
+                                {
+                                    "bbox": [
+                                        312,
+                                        631,
+                                        562,
+                                        641
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                631,
+                                                562,
+                                                641
+                                            ],
+                                            "score": 0.94,
+                                            "content": "Figure 4:Optimal values of the feedback coefficient \\ cor-",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        644,
+                                        561,
+                                        652
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                644,
+                                                561,
+                                                652
+                                            ],
+                                            "score": 0.97,
+                                            "content": "responding to the smallest mean squared error (MSE)of the",
+                                            "type": "text"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "bbox": [
+                                        312,
+                                        655,
+                                        513,
+                                        665
+                                    ],
+                                    "spans": [
+                                        {
+                                            "bbox": [
+                                                312,
+                                                655,
+                                                513,
+                                                665
+                                            ],
+                                            "score": 0.97,
+                                            "content": "disparity estimates for a range of noise strengths.",
+                                            "type": "text"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "type": "text",
+                    "bbox": [
+                        311,
+                        692,
+                        563,
+                        725
+                    ],
+                    "lines": [
+                        {
+                            "bbox": [
+                                311,
+                                692,
+                                563,
+                                702
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        692,
+                                        562,
+                                        702
+                                    ],
+                                    "score": 0.95,
+                                    "content": "of averaging out noise in the per-pixel costs by selecting",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                704,
+                                563,
+                                713
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        704,
+                                        562,
+                                        713
+                                    ],
+                                    "score": 0.98,
+                                    "content": "matches based more heavily upon the auxiliary cost, which",
+                                    "type": "text"
+                                }
+                            ]
+                        },
+                        {
+                            "bbox": [
+                                311,
+                                716,
+                                563,
+                                725
+                            ],
+                            "spans": [
+                                {
+                                    "bbox": [
+                                        311,
+                                        716,
+                                        563,
+                                        725
+                                    ],
+                                    "score": 0.97,
+                                    "content": "is essentially a much more stable running average of the cost",
+                                    "type": "text"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }
+    ],
+    "_parse_type": "ocr",
+    "_version_name": "0.7.0b1"
+}
--- a/tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf
+++ b/tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.2.pdf
--- a/tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.pdf
+++ b/tests/unittest/test_integrations/test_rag/assets/one_page_with_table_image.pdf
--- a/tests/unittest/test_integrations/test_rag/test_api.py
+++ b/tests/unittest/test_integrations/test_rag/test_api.py
+import json
+import os
+import shutil
+import tempfile
+from magic_pdf.integrations.rag.api import DataReader, RagDocumentReader
+from magic_pdf.integrations.rag.type import CategoryType
+from magic_pdf.integrations.rag.utils import \
+    convert_middle_json_to_layout_elements
+def test_rag_document_reader():
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+    # test
+    with open('tests/test_integrations/test_rag/assets/middle.json') as f:
+        json_data = json.load(f)
+    res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
+    doc = RagDocumentReader(res)
+    assert len(list(iter(doc))) == 1
+    page = list(iter(doc))[0]
+    assert len(list(iter(page))) == 10
+    assert len(page.get_rel_map()) == 3
+    item = list(iter(page))[0]
+    assert item.category_type == CategoryType.text
+    # teardown
+    shutil.rmtree(temp_output_dir)
+def test_data_reader():
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+    # test
+    data_reader = DataReader('tests/test_integrations/test_rag/assets', 'ocr',
+                             temp_output_dir)
+    assert data_reader.get_documents_count() == 2
+    for idx in range(data_reader.get_documents_count()):
+        document = data_reader.get_document_result(idx)
+        assert document is not None
+    # teardown
+    shutil.rmtree(temp_output_dir)
--- a/tests/unittest/test_integrations/test_rag/test_utils.py
+++ b/tests/unittest/test_integrations/test_rag/test_utils.py
+import json
+import os
+import shutil
+import tempfile
+from magic_pdf.integrations.rag.type import CategoryType
+from magic_pdf.integrations.rag.utils import (
+    convert_middle_json_to_layout_elements, inference)
+def test_convert_middle_json_to_layout_elements():
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+    # test
+    with open('tests/test_integrations/test_rag/assets/middle.json') as f:
+        json_data = json.load(f)
+    res = convert_middle_json_to_layout_elements(json_data, temp_output_dir)
+    assert len(res) == 1
+    assert len(res[0].layout_dets) == 10
+    assert res[0].layout_dets[0].anno_id == 0
+    assert res[0].layout_dets[0].category_type == CategoryType.text
+    assert len(res[0].extra.element_relation) == 3
+    # teardown
+    shutil.rmtree(temp_output_dir)
+def test_inference():
+    asset_dir = 'tests/test_integrations/test_rag/assets'
+    # setup
+    unitest_dir = '/tmp/magic_pdf/unittest/integrations/rag'
+    os.makedirs(unitest_dir, exist_ok=True)
+    temp_output_dir = tempfile.mkdtemp(dir=unitest_dir)
+    os.makedirs(temp_output_dir, exist_ok=True)
+    # test
+    res = inference(
+        asset_dir + '/one_page_with_table_image.pdf',
+        temp_output_dir,
+        'ocr',
+    )
+    assert res is not None
+    assert len(res) == 1
+    assert len(res[0].layout_dets) == 10
+    assert res[0].layout_dets[0].anno_id == 0
+    assert res[0].layout_dets[0].category_type == CategoryType.text
+    assert len(res[0].extra.element_relation) == 3
+    # teardown
+    shutil.rmtree(temp_output_dir)