test: Delete previous unit tests and add new end-to-end test.

ec1ba2c0 · Sidney233 · 343eaac1 · 343eaac1 · 343eaac1 · 343eaac1
Commit ec1ba2c0 authored Jul 14, 2025 by Sidney233
8 changed files
--- a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl
+++ b/tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl
-{"file_location":"tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf","doc_layout_result":[{"layout_dets":[{"category_id":1,"poly":[882.4013061523438,169.93817138671875,1552.350341796875,169.93817138671875,1552.350341796875,625.8263549804688,882.4013061523438,625.8263549804688],"score":0.999992311000824},{"category_id":1,"poly":[882.474853515625,1450.92822265625,1551.4490966796875,1450.92822265625,1551.4490966796875,1877.5712890625,882.474853515625,1877.5712890625],"score":0.9999903440475464},{"category_id":1,"poly":[881.6513061523438,626.2058715820312,1552.1400146484375,626.2058715820312,1552.1400146484375,1450.604736328125,881.6513061523438,1450.604736328125],"score":0.9999856352806091},{"category_id":1,"poly":[149.41075134277344,232.1595001220703,819.0465087890625,232.1595001220703,819.0465087890625,625.8865356445312,149.41075134277344,625.8865356445312],"score":0.99998539686203},{"category_id":1,"poly":[149.3945770263672,1215.5172119140625,817.8850708007812,1215.5172119140625,817.8850708007812,1304.873291015625,149.3945770263672,1304.873291015625],"score":0.9999765157699585},{"category_id":1,"poly":[882.6979370117188,1880.13916015625,1552.15185546875,1880.13916015625,1552.15185546875,2031.339599609375,882.6979370117188,2031.339599609375],"score":0.9999744892120361},{"category_id":1,"poly":[148.96054077148438,743.3055419921875,818.6231689453125,743.3055419921875,818.6231689453125,1074.2369384765625,148.96054077148438,1074.2369384765625],"score":0.9999669790267944},{"category_id":1,"poly":[148.8435516357422,1791.14306640625,818.6885375976562,1791.14306640625,818.6885375976562,2030.794189453125,148.8435516357422,2030.794189453125],"score":0.9999618530273438},{"category_id":0,"poly":[150.7009735107422,684.0087890625,623.5106201171875,684.0087890625,623.5106201171875,717.03662109375,150.7009735107422,717.03662109375],"score":0.9999415278434753},{"category_id":8,"poly":[146.48068237304688,1331.6737060546875,317.2640075683594,1331.6737060546875,317.2640075683594,1400.1722412109375,146.48068237304688,1400.1722412109375],"score":0.9998958110809326},{"category_id":1,"poly":[149.42420959472656,1430.8782958984375,818.9042358398438,1430.8782958984375,818.9042358398438,1672.7386474609375,149.42420959472656,1672.7386474609375],"score":0.9998599290847778},{"category_id":1,"poly":[149.18746948242188,172.10252380371094,818.5662231445312,172.10252380371094,818.5662231445312,230.4594268798828,149.18746948242188,230.4594268798828],"score":0.9997718334197998},{"category_id":0,"poly":[149.0175018310547,1732.1090087890625,702.1005859375,1732.1090087890625,702.1005859375,1763.6046142578125,149.0175018310547,1763.6046142578125],"score":0.9997085928916931},{"category_id":2,"poly":[1519.802490234375,98.59099578857422,1551.985107421875,98.59099578857422,1551.985107421875,119.48420715332031,1519.802490234375,119.48420715332031],"score":0.9995552897453308},{"category_id":8,"poly":[146.9109649658203,1100.156494140625,544.2803344726562,1100.156494140625,544.2803344726562,1184.929443359375,146.9109649658203,1184.929443359375],"score":0.9995207786560059},{"category_id":2,"poly":[148.11611938476562,99.87767791748047,318.926025390625,99.87767791748047,318.926025390625,120.70393371582031,148.11611938476562,120.70393371582031],"score":0.999351441860199},{"category_id":9,"poly":[791.7642211914062,1130.056396484375,818.6940307617188,1130.056396484375,818.6940307617188,1161.1080322265625,791.7642211914062,1161.1080322265625],"score":0.9908884763717651},{"category_id":9,"poly":[788.37060546875,1346.8450927734375,818.5010986328125,1346.8450927734375,818.5010986328125,1377.370361328125,788.37060546875,1377.370361328125],"score":0.9873985052108765},{"category_id":14,"poly":[146,1103,543,1103,543,1184,146,1184],"score":0.94,"latex":"E\\!\\left(W\\right)\\!=\\!\\frac{E\\!\\left[H^{2}\\right]}{2E\\!\\left[H\\right]}\\!=\\!\\frac{E\\!\\left[H\\right]}{2}\\!\\!\\left(1\\!+\\!\\operatorname{CV}\\!\\left(H\\right)^{2}\\right)"},{"category_id":13,"poly":[1196,354,1278,354,1278,384,1196,384],"score":0.91,"latex":"p(1-q)"},{"category_id":13,"poly":[881,415,1020,415,1020,444,881,444],"score":0.91,"latex":"(1-p)(1-q)"},{"category_id":14,"poly":[147,1333,318,1333,318,1400,147,1400],"score":0.91,"latex":"\\mathrm{CV}\\big(H\\big)\\!=\\!\\frac{\\sigma_{_H}}{E\\big[H\\big]}"},{"category_id":13,"poly":[1197,657,1263,657,1263,686,1197,686],"score":0.9,"latex":"(1-p)"},{"category_id":13,"poly":[213,1217,263,1217,263,1244,213,1244],"score":0.88,"latex":"E[X]"},{"category_id":13,"poly":[214,1434,245,1434,245,1459,214,1459],"score":0.87,"latex":"\\upsigma_{H}"},{"category_id":13,"poly":[324,2002,373,2002,373,2028,324,2028],"score":0.84,"latex":"30\\%"},{"category_id":13,"poly":[1209,693,1225,693,1225,717,1209,717],"score":0.83,"latex":"p"},{"category_id":13,"poly":[990,449,1007,449,1007,474,990,474],"score":0.81,"latex":"p"},{"category_id":13,"poly":[346,1277,369,1277,369,1301,346,1301],"score":0.81,"latex":"H"},{"category_id":13,"poly":[1137,661,1154,661,1154,686,1137,686],"score":0.81,"latex":"p"},{"category_id":13,"poly":[522,1432,579,1432,579,1459,522,1459],"score":0.81,"latex":"H\\left(4\\right)"},{"category_id":13,"poly":[944,540,962,540,962,565,944,565],"score":0.8,"latex":"p"},{"category_id":13,"poly":[1444,936,1461,936,1461,961,1444,961],"score":0.79,"latex":"p"},{"category_id":13,"poly":[602,1247,624,1247,624,1270,602,1270],"score":0.78,"latex":"H"},{"category_id":13,"poly":[147,1247,167,1247,167,1271,147,1271],"score":0.77,"latex":"X"},{"category_id":13,"poly":[210,1246,282,1246,282,1274,210,1274],"score":0.77,"latex":"\\mathrm{CV}(H)"},{"category_id":13,"poly":[1346,268,1361,268,1361,292,1346,292],"score":0.76,"latex":"q"},{"category_id":13,"poly":[215,957,238,957,238,981,215,981],"score":0.74,"latex":"H"},{"category_id":13,"poly":[149,956,173,956,173,981,149,981],"score":0.63,"latex":"W"},{"category_id":13,"poly":[924,841,1016,841,1016,868,924,868],"score":0.56,"latex":"8{\\mathrm{:}}00\\;\\mathrm{a.m}"},{"category_id":13,"poly":[956,871,1032,871,1032,898,956,898],"score":0.43,"latex":"20\\ \\mathrm{min}"},{"category_id":13,"poly":[1082,781,1112,781,1112,808,1082,808],"score":0.41,"latex":"(I)"},{"category_id":13,"poly":[697,1821,734,1821,734,1847,697,1847],"score":0.3,"latex":"1\\,\\mathrm{~h~}"},{"category_id":15,"poly":[881.0,174.0,1552.0,174.0,1552.0,204.0,881.0,204.0],"score":1.0,"text":"model. They also found that the empirical distributions of passenger"},{"category_id":15,"poly":[880.0,205.0,1552.0,205.0,1552.0,236.0,880.0,236.0],"score":0.99,"text":"incidence times (by time of day) had peaks just before the respec-"},{"category_id":15,"poly":[880.0,234.0,1553.0,234.0,1553.0,264.0,880.0,264.0],"score":0.99,"text":"tive average bus departure times. They hypothesized the existence"},{"category_id":15,"poly":[881.0,264.0,1345.0,264.0,1345.0,296.0,881.0,296.0],"score":0.98,"text":"of three classes of passengers: with proportion"},{"category_id":15,"poly":[1362.0,264.0,1552.0,264.0,1552.0,296.0,1362.0,296.0],"score":0.95,"text":"passengers whose"},{"category_id":15,"poly":[880.0,295.0,1552.0,295.0,1552.0,325.0,880.0,325.0],"score":1.0,"text":"time of incidence is causally coincident with that of a bus departure"},{"category_id":15,"poly":[880.0,326.0,1555.0,326.0,1555.0,355.0,880.0,355.0],"score":0.99,"text":"(e.g., because they saw the approaching bus from their home or a"},{"category_id":15,"poly":[881.0,356.0,1195.0,356.0,1195.0,388.0,881.0,388.0],"score":0.99,"text":"shop window); with proportion"},{"category_id":15,"poly":[1279.0,356.0,1553.0,356.0,1553.0,388.0,1279.0,388.0],"score":0.99,"text":", passengers who time their"},{"category_id":15,"poly":[882.0,388.0,1552.0,388.0,1552.0,416.0,882.0,416.0],"score":0.99,"text":"arrivals to minimize expected waiting time; and with proportion"},{"category_id":15,"poly":[1021.0,418.0,1553.0,418.0,1553.0,447.0,1021.0,447.0],"score":1.0,"text":", passengers who are randomly incident. The authors"},{"category_id":15,"poly":[881.0,448.0,989.0,448.0,989.0,477.0,881.0,477.0],"score":1.0,"text":"found that"},{"category_id":15,"poly":[1008.0,448.0,1553.0,448.0,1553.0,477.0,1008.0,477.0],"score":1.0,"text":"was positively correlated with the potential reduction"},{"category_id":15,"poly":[880.0,479.0,1552.0,479.0,1552.0,507.0,880.0,507.0],"score":1.0,"text":"in waiting time (compared with arriving randomly) that resulted"},{"category_id":15,"poly":[882.0,510.0,1551.0,510.0,1551.0,536.0,882.0,536.0],"score":0.97,"text":"from knowledge of the timetable and of service reliability. They also"},{"category_id":15,"poly":[881.0,539.0,943.0,539.0,943.0,568.0,881.0,568.0],"score":1.0,"text":"found"},{"category_id":15,"poly":[963.0,539.0,1553.0,539.0,1553.0,568.0,963.0,568.0],"score":0.99,"text":"to be higher in the peak commuting periods rather than in"},{"category_id":15,"poly":[881.0,568.0,1554.0,568.0,1554.0,599.0,881.0,599.0],"score":0.98,"text":"the off-peak periods, indicating more awareness of the timetable or"},{"category_id":15,"poly":[881.0,599.0,1323.0,599.0,1323.0,627.0,881.0,627.0],"score":0.98,"text":"historical reliability, or both, by commuters."},{"category_id":15,"poly":[905.0,1452.0,1551.0,1452.0,1551.0,1483.0,905.0,1483.0],"score":0.99,"text":"Furth and Muller study the issue in a theoretical context and gener-"},{"category_id":15,"poly":[883.0,1485.0,1553.0,1485.0,1553.0,1514.0,883.0,1514.0],"score":1.0,"text":"ally agree with the above findings (2). They are primarily concerned"},{"category_id":15,"poly":[882.0,1513.0,1553.0,1513.0,1553.0,1545.0,882.0,1545.0],"score":0.99,"text":"with the use of data from automatic vehicle-tracking systems to assess"},{"category_id":15,"poly":[880.0,1545.0,1553.0,1545.0,1553.0,1574.0,880.0,1574.0],"score":0.99,"text":"the impacts of reliability on passenger incidence behavior and wait-"},{"category_id":15,"poly":[881.0,1577.0,1551.0,1577.0,1551.0,1606.0,881.0,1606.0],"score":0.98,"text":"ing times. They propose that passengers will react to unreliability by"},{"category_id":15,"poly":[883.0,1608.0,1551.0,1608.0,1551.0,1637.0,883.0,1637.0],"score":1.0,"text":"departing earlier than they would with reliable services. Randomly"},{"category_id":15,"poly":[880.0,1636.0,1554.0,1636.0,1554.0,1669.0,880.0,1669.0],"score":1.0,"text":"incident unaware passengers will experience unreliability as a more"},{"category_id":15,"poly":[882.0,1669.0,1553.0,1669.0,1553.0,1697.0,882.0,1697.0],"score":0.99,"text":"dispersed distribution of headways and simply allocate additional"},{"category_id":15,"poly":[880.0,1699.0,1551.0,1699.0,1551.0,1726.0,880.0,1726.0],"score":0.97,"text":"time to their trip plan to improve the chance of arriving at their des-"},{"category_id":15,"poly":[881.0,1730.0,1551.0,1730.0,1551.0,1759.0,881.0,1759.0],"score":0.98,"text":"tination on time. Aware passengers, whose incidence is not entirely"},{"category_id":15,"poly":[880.0,1760.0,1552.0,1760.0,1552.0,1789.0,880.0,1789.0],"score":0.99,"text":"random, will react by timing their incidence somewhat earlier than"},{"category_id":15,"poly":[882.0,1792.0,1550.0,1792.0,1550.0,1818.0,882.0,1818.0],"score":0.99,"text":"the scheduled departure time to increase their chance of catching the"},{"category_id":15,"poly":[883.0,1823.0,1552.0,1823.0,1552.0,1849.0,883.0,1849.0],"score":0.99,"text":"desired service. The authors characterize these reactions as the costs"},{"category_id":15,"poly":[883.0,1853.0,1031.0,1853.0,1031.0,1880.0,883.0,1880.0],"score":0.95,"text":"of unreliability."},{"category_id":15,"poly":[907.0,630.0,1553.0,630.0,1553.0,658.0,907.0,658.0],"score":1.0,"text":"Bowman and Turnquist built on the concept of aware and unaware"},{"category_id":15,"poly":[881.0,662.0,1136.0,662.0,1136.0,690.0,881.0,690.0],"score":0.99,"text":"passengers of proportions"},{"category_id":15,"poly":[1155.0,662.0,1196.0,662.0,1196.0,690.0,1155.0,690.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1264.0,662.0,1553.0,662.0,1553.0,690.0,1264.0,690.0],"score":0.99,"text":",respectively. They proposed"},{"category_id":15,"poly":[881.0,692.0,1208.0,692.0,1208.0,719.0,881.0,719.0],"score":0.99,"text":"a utility-based model to estimate"},{"category_id":15,"poly":[1226.0,692.0,1552.0,692.0,1552.0,719.0,1226.0,719.0],"score":1.0,"text":"and the distribution of incidence"},{"category_id":15,"poly":[880.0,721.0,1554.0,721.0,1554.0,751.0,880.0,751.0],"score":0.99,"text":"times, and thus the mean waiting time, of aware passengers over"},{"category_id":15,"poly":[880.0,752.0,1553.0,752.0,1553.0,780.0,880.0,780.0],"score":0.98,"text":"a given headway as a function of the headway and reliability of"},{"category_id":15,"poly":[880.0,782.0,1081.0,782.0,1081.0,812.0,880.0,812.0],"score":0.99,"text":"bus departure times"},{"category_id":15,"poly":[1113.0,782.0,1552.0,782.0,1552.0,812.0,1113.0,812.0],"score":0.99,"text":". They observed seven bus stops in Chicago,"},{"category_id":15,"poly":[882.0,813.0,1553.0,813.0,1553.0,841.0,882.0,841.0],"score":0.98,"text":"Illinois, each served by a single (different) bus route, between 6:00"},{"category_id":15,"poly":[882.0,844.0,923.0,844.0,923.0,871.0,882.0,871.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1017.0,844.0,1550.0,844.0,1550.0,871.0,1017.0,871.0],"score":0.97,"text":".for 5 to 10 days each. The bus routes had headways"},{"category_id":15,"poly":[882.0,874.0,955.0,874.0,955.0,902.0,882.0,902.0],"score":0.95,"text":"of 5to"},{"category_id":15,"poly":[1033.0,874.0,1553.0,874.0,1553.0,902.0,1033.0,902.0],"score":0.98,"text":"and a range of reliabilities. The authors found that"},{"category_id":15,"poly":[882.0,906.0,1553.0,906.0,1553.0,933.0,882.0,933.0],"score":0.99,"text":"actual average waiting time was substantially less than predicted"},{"category_id":15,"poly":[881.0,935.0,1443.0,935.0,1443.0,963.0,881.0,963.0],"score":1.0,"text":"by the random incidence model. They estimated that"},{"category_id":15,"poly":[1462.0,935.0,1553.0,935.0,1553.0,963.0,1462.0,963.0],"score":0.96,"text":"was not"},{"category_id":15,"poly":[881.0,966.0,1552.0,966.0,1552.0,994.0,881.0,994.0],"score":0.98,"text":"statistically significantly different from 1.0, which they explain by"},{"category_id":15,"poly":[880.0,994.0,1552.0,994.0,1552.0,1025.0,880.0,1025.0],"score":0.99,"text":"the fact that all observations were taken during peak commuting"},{"category_id":15,"poly":[880.0,1027.0,1552.0,1027.0,1552.0,1054.0,880.0,1054.0],"score":0.99,"text":"times. Their model predicts that the longer the headway and the"},{"category_id":15,"poly":[881.0,1058.0,1554.0,1058.0,1554.0,1086.0,881.0,1086.0],"score":0.99,"text":"more reliable the departures, the more peaked the distribution of"},{"category_id":15,"poly":[881.0,1088.0,1553.0,1088.0,1553.0,1115.0,881.0,1115.0],"score":0.98,"text":"incidence times will be and the closer that peak will be to the next"},{"category_id":15,"poly":[882.0,1119.0,1552.0,1119.0,1552.0,1148.0,882.0,1148.0],"score":1.0,"text":"scheduled departure time. This prediction demonstrates what they"},{"category_id":15,"poly":[882.0,1149.0,1552.0,1149.0,1552.0,1176.0,882.0,1176.0],"score":0.99,"text":"refer to as a safety margin that passengers add to reduce the chance"},{"category_id":15,"poly":[883.0,1181.0,1552.0,1181.0,1552.0,1206.0,883.0,1206.0],"score":0.98,"text":"of missing their bus when the service is known to be somewhat"},{"category_id":15,"poly":[882.0,1210.0,1551.0,1210.0,1551.0,1238.0,882.0,1238.0],"score":0.98,"text":"unreliable. Such a safety margin can also result from unreliability in"},{"category_id":15,"poly":[881.0,1242.0,1553.0,1242.0,1553.0,1269.0,881.0,1269.0],"score":0.99,"text":"passengers' journeys to the public transport stop or station. Bowman"},{"category_id":15,"poly":[882.0,1271.0,1553.0,1271.0,1553.0,1299.0,882.0,1299.0],"score":0.99,"text":"and Turnquist conclude from their model that the random incidence"},{"category_id":15,"poly":[880.0,1301.0,1551.0,1301.0,1551.0,1331.0,880.0,1331.0],"score":0.99,"text":"model underestimates the waiting time benefits of improving reli-"},{"category_id":15,"poly":[882.0,1332.0,1552.0,1332.0,1552.0,1362.0,882.0,1362.0],"score":0.99,"text":"ability and overestimates the waiting time benefits of increasing ser-"},{"category_id":15,"poly":[883.0,1363.0,1552.0,1363.0,1552.0,1392.0,883.0,1392.0],"score":0.99,"text":"vice frequency. This is because as reliability increases passengers"},{"category_id":15,"poly":[882.0,1394.0,1552.0,1394.0,1552.0,1422.0,882.0,1422.0],"score":0.99,"text":"can better predict departure times and so can time their incidence to"},{"category_id":15,"poly":[882.0,1423.0,1159.0,1423.0,1159.0,1452.0,882.0,1452.0],"score":0.99,"text":"decrease their waiting time."},{"category_id":15,"poly":[175.0,235.0,819.0,235.0,819.0,264.0,175.0,264.0],"score":0.99,"text":"After briefly introducing the random incidence model, which is"},{"category_id":15,"poly":[149.0,265.0,818.0,265.0,818.0,295.0,149.0,295.0],"score":0.98,"text":"often assumed to hold at short headways, the balance of this section"},{"category_id":15,"poly":[148.0,298.0,818.0,298.0,818.0,324.0,148.0,324.0],"score":0.98,"text":"reviews six studies of passenger incidence behavior that are moti-"},{"category_id":15,"poly":[148.0,327.0,818.0,327.0,818.0,356.0,148.0,356.0],"score":1.0,"text":"vated by understanding the relationships between service headway,"},{"category_id":15,"poly":[146.0,355.0,820.0,355.0,820.0,388.0,146.0,388.0],"score":0.99,"text":"service reliability, passenger incidence behavior, and passenger"},{"category_id":15,"poly":[149.0,388.0,818.0,388.0,818.0,414.0,149.0,414.0],"score":1.0,"text":"waiting time in a more nuanced fashion than is embedded in the"},{"category_id":15,"poly":[149.0,419.0,818.0,419.0,818.0,445.0,149.0,445.0],"score":1.0,"text":"random incidence assumption (2). Three of these studies depend on"},{"category_id":15,"poly":[147.0,447.0,818.0,447.0,818.0,477.0,147.0,477.0],"score":0.99,"text":"manually collected data, two studies use data from AFC systems,"},{"category_id":15,"poly":[148.0,479.0,819.0,479.0,819.0,507.0,148.0,507.0],"score":0.99,"text":"and one study analyzes the issue purely theoretically. These studies"},{"category_id":15,"poly":[147.0,509.0,819.0,509.0,819.0,537.0,147.0,537.0],"score":0.99,"text":"reveal much about passenger incidence behavior, but all are found"},{"category_id":15,"poly":[147.0,538.0,820.0,538.0,820.0,567.0,147.0,567.0],"score":0.99,"text":"to be limited in their general applicability by the methods with"},{"category_id":15,"poly":[150.0,569.0,818.0,569.0,818.0,597.0,150.0,597.0],"score":0.99,"text":"which they collect information about passengers and the services"},{"category_id":15,"poly":[147.0,599.0,458.0,599.0,458.0,630.0,147.0,630.0],"score":1.0,"text":"those passengers intend to use."},{"category_id":15,"poly":[150.0,1219.0,212.0,1219.0,212.0,1247.0,150.0,1247.0],"score":1.0,"text":"where"},{"category_id":15,"poly":[264.0,1219.0,817.0,1219.0,817.0,1247.0,264.0,1247.0],"score":0.99,"text":"is the probabilistic expectation of some random variable"},{"category_id":15,"poly":[168.0,1248.0,209.0,1248.0,209.0,1275.0,168.0,1275.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[283.0,1248.0,601.0,1248.0,601.0,1275.0,283.0,1275.0],"score":0.97,"text":"is the coefficient of variation of"},{"category_id":15,"poly":[625.0,1248.0,818.0,1248.0,818.0,1275.0,625.0,1275.0],"score":0.96,"text":".a unitless measure"},{"category_id":15,"poly":[148.0,1277.0,345.0,1277.0,345.0,1307.0,148.0,1307.0],"score":0.97,"text":"of the variability of"},{"category_id":15,"poly":[370.0,1277.0,477.0,1277.0,477.0,1307.0,370.0,1307.0],"score":0.99,"text":"defined as"},{"category_id":15,"poly":[906.0,1883.0,1552.0,1883.0,1552.0,1910.0,906.0,1910.0],"score":0.98,"text":"Luethi et al. continued with the analysis of manually collected"},{"category_id":15,"poly":[880.0,1909.0,1552.0,1909.0,1552.0,1945.0,880.0,1945.0],"score":0.99,"text":"data on actual passenger behavior (6). They use the language"},{"category_id":15,"poly":[883.0,1945.0,1552.0,1945.0,1552.0,1972.0,883.0,1972.0],"score":0.99,"text":"of probability to describe two classes of passengers. The first is"},{"category_id":15,"poly":[881.0,1973.0,1552.0,1973.0,1552.0,2003.0,881.0,2003.0],"score":1.0,"text":"timetable-dependent passengers (i.e., the aware passengers), whose"},{"category_id":15,"poly":[881.0,2006.0,1552.0,2006.0,1552.0,2033.0,881.0,2033.0],"score":1.0,"text":"incidence behavior is affected by awareness (possibly gained"},{"category_id":15,"poly":[149.0,748.0,817.0,748.0,817.0,774.0,149.0,774.0],"score":1.0,"text":"One characterization of passenger incidence behavior is that of ran-"},{"category_id":15,"poly":[148.0,777.0,818.0,777.0,818.0,806.0,148.0,806.0],"score":0.99,"text":"dom incidence (3). The key assumption underlying the random inci-"},{"category_id":15,"poly":[148.0,807.0,818.0,807.0,818.0,836.0,148.0,836.0],"score":0.99,"text":"dence model is that the process of passenger arrivals to the public"},{"category_id":15,"poly":[148.0,837.0,819.0,837.0,819.0,866.0,148.0,866.0],"score":0.99,"text":"transport service is independent from the vehicle departure process"},{"category_id":15,"poly":[148.0,868.0,818.0,868.0,818.0,897.0,148.0,897.0],"score":1.0,"text":"of the service. This implies that passengers become incident to the"},{"category_id":15,"poly":[149.0,899.0,817.0,899.0,817.0,925.0,149.0,925.0],"score":0.99,"text":"service at a random time, and thus the instantaneous rate of passen-"},{"category_id":15,"poly":[148.0,928.0,820.0,928.0,820.0,957.0,148.0,957.0],"score":1.0,"text":"ger arrivals to the service is uniform over a given period of time. Let"},{"category_id":15,"poly":[174.0,956.0,214.0,956.0,214.0,990.0,174.0,990.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[239.0,956.0,818.0,956.0,818.0,990.0,239.0,990.0],"score":0.99,"text":"be random variables representing passenger waiting times"},{"category_id":15,"poly":[148.0,988.0,818.0,988.0,818.0,1016.0,148.0,1016.0],"score":1.0,"text":"and service headways, respectively. Under the random incidence"},{"category_id":15,"poly":[149.0,1019.0,818.0,1019.0,818.0,1048.0,149.0,1048.0],"score":0.98,"text":"assumption and the assumption that vehicle capacity is not a binding"},{"category_id":15,"poly":[149.0,1050.0,726.0,1050.0,726.0,1076.0,149.0,1076.0],"score":0.99,"text":"constraint, a classic result of transportation science is that"},{"category_id":15,"poly":[146.0,1793.0,818.0,1793.0,818.0,1822.0,146.0,1822.0],"score":0.98,"text":" Jolliffe and Hutchinson studied bus passenger incidence in South"},{"category_id":15,"poly":[147.0,1825.0,696.0,1825.0,696.0,1852.0,147.0,1852.0],"score":0.97,"text":"London suburbs (5). They observed 10 bus stops for"},{"category_id":15,"poly":[735.0,1825.0,817.0,1825.0,817.0,1852.0,735.0,1852.0],"score":1.0,"text":"perday"},{"category_id":15,"poly":[148.0,1855.0,819.0,1855.0,819.0,1881.0,148.0,1881.0],"score":1.0,"text":"over 8 days, recording the times of passenger incidence and actual"},{"category_id":15,"poly":[148.0,1884.0,819.0,1884.0,819.0,1912.0,148.0,1912.0],"score":0.98,"text":"and scheduled bus departures. They limited their stop selection to"},{"category_id":15,"poly":[146.0,1913.0,819.0,1913.0,819.0,1945.0,146.0,1945.0],"score":1.0,"text":"those served by only a single bus route with a single service pat-"},{"category_id":15,"poly":[147.0,1945.0,819.0,1945.0,819.0,1974.0,147.0,1974.0],"score":0.98,"text":"tern so as to avoid ambiguity about which service a passenger was"},{"category_id":15,"poly":[147.0,1972.0,820.0,1972.0,820.0,2006.0,147.0,2006.0],"score":0.98,"text":"waiting for. The authors found that the actual average passenger"},{"category_id":15,"poly":[149.0,2005.0,323.0,2005.0,323.0,2033.0,149.0,2033.0],"score":0.96,"text":"waitingtimewas"},{"category_id":15,"poly":[374.0,2005.0,819.0,2005.0,819.0,2033.0,374.0,2033.0],"score":1.0,"text":"less than predicted by the random incidence"},{"category_id":15,"poly":[148.0,686.0,625.0,686.0,625.0,721.0,148.0,721.0],"score":0.99,"text":"Random Passenger Incidence Behavior"},{"category_id":15,"poly":[151.0,1434.0,213.0,1434.0,213.0,1462.0,151.0,1462.0],"score":0.99,"text":"where"},{"category_id":15,"poly":[246.0,1434.0,521.0,1434.0,521.0,1462.0,246.0,1462.0],"score":0.98,"text":"is the standard deviation of"},{"category_id":15,"poly":[580.0,1434.0,816.0,1434.0,816.0,1462.0,580.0,1462.0],"score":0.96,"text":".The second expression"},{"category_id":15,"poly":[148.0,1466.0,819.0,1466.0,819.0,1493.0,148.0,1493.0],"score":0.99,"text":"in Equation 1 is particularly useful because it expresses the mean"},{"category_id":15,"poly":[146.0,1496.0,819.0,1496.0,819.0,1525.0,146.0,1525.0],"score":0.99,"text":"passenger waiting time as the sum of two components: the waiting"},{"category_id":15,"poly":[148.0,1526.0,818.0,1526.0,818.0,1553.0,148.0,1553.0],"score":0.98,"text":"time caused by the mean headway (i.e., the reciprocal of service fre-"},{"category_id":15,"poly":[147.0,1557.0,819.0,1557.0,819.0,1584.0,147.0,1584.0],"score":0.99,"text":"quency) and the waiting time caused by the variability of the head-"},{"category_id":15,"poly":[148.0,1588.0,818.0,1588.0,818.0,1612.0,148.0,1612.0],"score":0.97,"text":"ways (which is one measure of service reliability). When the service"},{"category_id":15,"poly":[148.0,1617.0,817.0,1617.0,817.0,1644.0,148.0,1644.0],"score":1.0,"text":"is perfectly reliable with constant headways, the mean waiting time"},{"category_id":15,"poly":[148.0,1646.0,472.0,1646.0,472.0,1677.0,148.0,1677.0],"score":0.99,"text":"will be simply half the headway."},{"category_id":15,"poly":[151.0,176.0,817.0,176.0,817.0,204.0,151.0,204.0],"score":0.99,"text":"dependent on the service headway and the reliability of the departure"},{"category_id":15,"poly":[147.0,205.0,652.0,205.0,652.0,236.0,147.0,236.0],"score":0.99,"text":"time of the service to which passengers are incident."},{"category_id":15,"poly":[149.0,1735.0,702.0,1735.0,702.0,1767.0,149.0,1767.0],"score":0.98,"text":"More Behaviorally Realistic Incidence Models"},{"category_id":15,"poly":[1519.0,98.0,1554.0,98.0,1554.0,125.0,1519.0,125.0],"score":1.0,"text":"53"},{"category_id":15,"poly":[148.0,98.0,322.0,98.0,322.0,123.0,148.0,123.0],"score":1.0,"text":"Frumin and Zhao"}],"page_info":{"page_no":0,"height":2200,"width":1700}}]}
--- a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json
+++ b/tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json
-[{"layout_dets":[{"category_id":1,"poly":[882.4013061523438,169.93817138671875,1552.350341796875,169.93817138671875,1552.350341796875,625.8263549804688,882.4013061523438,625.8263549804688],"score":0.999992311000824},{"category_id":1,"poly":[882.474853515625,1450.92822265625,1551.4490966796875,1450.92822265625,1551.4490966796875,1877.5712890625,882.474853515625,1877.5712890625],"score":0.9999903440475464},{"category_id":1,"poly":[881.6513061523438,626.2058715820312,1552.1400146484375,626.2058715820312,1552.1400146484375,1450.604736328125,881.6513061523438,1450.604736328125],"score":0.9999856352806091},{"category_id":1,"poly":[149.41075134277344,232.1595001220703,819.0465087890625,232.1595001220703,819.0465087890625,625.8865356445312,149.41075134277344,625.8865356445312],"score":0.99998539686203},{"category_id":1,"poly":[149.3945770263672,1215.5172119140625,817.8850708007812,1215.5172119140625,817.8850708007812,1304.873291015625,149.3945770263672,1304.873291015625],"score":0.9999765157699585},{"category_id":1,"poly":[882.6979370117188,1880.13916015625,1552.15185546875,1880.13916015625,1552.15185546875,2031.339599609375,882.6979370117188,2031.339599609375],"score":0.9999744892120361},{"category_id":1,"poly":[148.96054077148438,743.3055419921875,818.6231689453125,743.3055419921875,818.6231689453125,1074.2369384765625,148.96054077148438,1074.2369384765625],"score":0.9999669790267944},{"category_id":1,"poly":[148.8435516357422,1791.14306640625,818.6885375976562,1791.14306640625,818.6885375976562,2030.794189453125,148.8435516357422,2030.794189453125],"score":0.9999618530273438},{"category_id":0,"poly":[150.7009735107422,684.0087890625,623.5106201171875,684.0087890625,623.5106201171875,717.03662109375,150.7009735107422,717.03662109375],"score":0.9999415278434753},{"category_id":8,"poly":[146.48068237304688,1331.6737060546875,317.2640075683594,1331.6737060546875,317.2640075683594,1400.1722412109375,146.48068237304688,1400.1722412109375],"score":0.9998958110809326},{"category_id":1,"poly":[149.42420959472656,1430.8782958984375,818.9042358398438,1430.8782958984375,818.9042358398438,1672.7386474609375,149.42420959472656,1672.7386474609375],"score":0.9998599290847778},{"category_id":1,"poly":[149.18746948242188,172.10252380371094,818.5662231445312,172.10252380371094,818.5662231445312,230.4594268798828,149.18746948242188,230.4594268798828],"score":0.9997718334197998},{"category_id":0,"poly":[149.0175018310547,1732.1090087890625,702.1005859375,1732.1090087890625,702.1005859375,1763.6046142578125,149.0175018310547,1763.6046142578125],"score":0.9997085928916931},{"category_id":2,"poly":[1519.802490234375,98.59099578857422,1551.985107421875,98.59099578857422,1551.985107421875,119.48420715332031,1519.802490234375,119.48420715332031],"score":0.9995552897453308},{"category_id":8,"poly":[146.9109649658203,1100.156494140625,544.2803344726562,1100.156494140625,544.2803344726562,1184.929443359375,146.9109649658203,1184.929443359375],"score":0.9995207786560059},{"category_id":2,"poly":[148.11611938476562,99.87767791748047,318.926025390625,99.87767791748047,318.926025390625,120.70393371582031,148.11611938476562,120.70393371582031],"score":0.999351441860199},{"category_id":9,"poly":[791.7642211914062,1130.056396484375,818.6940307617188,1130.056396484375,818.6940307617188,1161.1080322265625,791.7642211914062,1161.1080322265625],"score":0.9908884763717651},{"category_id":9,"poly":[788.37060546875,1346.8450927734375,818.5010986328125,1346.8450927734375,818.5010986328125,1377.370361328125,788.37060546875,1377.370361328125],"score":0.9873985052108765},{"category_id":14,"poly":[146,1103,543,1103,543,1184,146,1184],"score":0.94,"latex":"E\\!\\left(W\\right)\\!=\\!\\frac{E\\!\\left[H^{2}\\right]}{2E\\!\\left[H\\right]}\\!=\\!\\frac{E\\!\\left[H\\right]}{2}\\!\\!\\left(1\\!+\\!\\operatorname{CV}\\!\\left(H\\right)^{2}\\right)"},{"category_id":13,"poly":[1196,354,1278,354,1278,384,1196,384],"score":0.91,"latex":"p(1-q)"},{"category_id":13,"poly":[881,415,1020,415,1020,444,881,444],"score":0.91,"latex":"(1-p)(1-q)"},{"category_id":14,"poly":[147,1333,318,1333,318,1400,147,1400],"score":0.91,"latex":"\\mathrm{CV}\\big(H\\big)\\!=\\!\\frac{\\sigma_{_H}}{E\\big[H\\big]}"},{"category_id":13,"poly":[1197,657,1263,657,1263,686,1197,686],"score":0.9,"latex":"(1-p)"},{"category_id":13,"poly":[213,1217,263,1217,263,1244,213,1244],"score":0.88,"latex":"E[X]"},{"category_id":13,"poly":[214,1434,245,1434,245,1459,214,1459],"score":0.87,"latex":"\\upsigma_{H}"},{"category_id":13,"poly":[324,2002,373,2002,373,2028,324,2028],"score":0.84,"latex":"30\\%"},{"category_id":13,"poly":[1209,693,1225,693,1225,717,1209,717],"score":0.83,"latex":"p"},{"category_id":13,"poly":[990,449,1007,449,1007,474,990,474],"score":0.81,"latex":"p"},{"category_id":13,"poly":[346,1277,369,1277,369,1301,346,1301],"score":0.81,"latex":"H"},{"category_id":13,"poly":[1137,661,1154,661,1154,686,1137,686],"score":0.81,"latex":"p"},{"category_id":13,"poly":[522,1432,579,1432,579,1459,522,1459],"score":0.81,"latex":"H\\left(4\\right)"},{"category_id":13,"poly":[944,540,962,540,962,565,944,565],"score":0.8,"latex":"p"},{"category_id":13,"poly":[1444,936,1461,936,1461,961,1444,961],"score":0.79,"latex":"p"},{"category_id":13,"poly":[602,1247,624,1247,624,1270,602,1270],"score":0.78,"latex":"H"},{"category_id":13,"poly":[147,1247,167,1247,167,1271,147,1271],"score":0.77,"latex":"X"},{"category_id":13,"poly":[210,1246,282,1246,282,1274,210,1274],"score":0.77,"latex":"\\mathrm{CV}(H)"},{"category_id":13,"poly":[1346,268,1361,268,1361,292,1346,292],"score":0.76,"latex":"q"},{"category_id":13,"poly":[215,957,238,957,238,981,215,981],"score":0.74,"latex":"H"},{"category_id":13,"poly":[149,956,173,956,173,981,149,981],"score":0.63,"latex":"W"},{"category_id":13,"poly":[924,841,1016,841,1016,868,924,868],"score":0.56,"latex":"8{\\mathrm{:}}00\\;\\mathrm{a.m}"},{"category_id":13,"poly":[956,871,1032,871,1032,898,956,898],"score":0.43,"latex":"20\\ \\mathrm{min}"},{"category_id":13,"poly":[1082,781,1112,781,1112,808,1082,808],"score":0.41,"latex":"(I)"},{"category_id":13,"poly":[697,1821,734,1821,734,1847,697,1847],"score":0.3,"latex":"1\\,\\mathrm{~h~}"},{"category_id":15,"poly":[881.0,174.0,1552.0,174.0,1552.0,204.0,881.0,204.0],"score":1.0,"text":"model. They also found that the empirical distributions of passenger"},{"category_id":15,"poly":[880.0,205.0,1552.0,205.0,1552.0,236.0,880.0,236.0],"score":0.99,"text":"incidence times (by time of day) had peaks just before the respec-"},{"category_id":15,"poly":[880.0,234.0,1553.0,234.0,1553.0,264.0,880.0,264.0],"score":0.99,"text":"tive average bus departure times. They hypothesized the existence"},{"category_id":15,"poly":[881.0,264.0,1345.0,264.0,1345.0,296.0,881.0,296.0],"score":0.98,"text":"of three classes of passengers: with proportion"},{"category_id":15,"poly":[1362.0,264.0,1552.0,264.0,1552.0,296.0,1362.0,296.0],"score":0.95,"text":"passengers whose"},{"category_id":15,"poly":[880.0,295.0,1552.0,295.0,1552.0,325.0,880.0,325.0],"score":1.0,"text":"time of incidence is causally coincident with that of a bus departure"},{"category_id":15,"poly":[880.0,326.0,1555.0,326.0,1555.0,355.0,880.0,355.0],"score":0.99,"text":"(e.g., because they saw the approaching bus from their home or a"},{"category_id":15,"poly":[881.0,356.0,1195.0,356.0,1195.0,388.0,881.0,388.0],"score":0.99,"text":"shop window); with proportion"},{"category_id":15,"poly":[1279.0,356.0,1553.0,356.0,1553.0,388.0,1279.0,388.0],"score":0.99,"text":", passengers who time their"},{"category_id":15,"poly":[882.0,388.0,1552.0,388.0,1552.0,416.0,882.0,416.0],"score":0.99,"text":"arrivals to minimize expected waiting time; and with proportion"},{"category_id":15,"poly":[1021.0,418.0,1553.0,418.0,1553.0,447.0,1021.0,447.0],"score":1.0,"text":", passengers who are randomly incident. The authors"},{"category_id":15,"poly":[881.0,448.0,989.0,448.0,989.0,477.0,881.0,477.0],"score":1.0,"text":"found that"},{"category_id":15,"poly":[1008.0,448.0,1553.0,448.0,1553.0,477.0,1008.0,477.0],"score":1.0,"text":"was positively correlated with the potential reduction"},{"category_id":15,"poly":[880.0,479.0,1552.0,479.0,1552.0,507.0,880.0,507.0],"score":1.0,"text":"in waiting time (compared with arriving randomly) that resulted"},{"category_id":15,"poly":[882.0,510.0,1551.0,510.0,1551.0,536.0,882.0,536.0],"score":0.97,"text":"from knowledge of the timetable and of service reliability. They also"},{"category_id":15,"poly":[881.0,539.0,943.0,539.0,943.0,568.0,881.0,568.0],"score":1.0,"text":"found"},{"category_id":15,"poly":[963.0,539.0,1553.0,539.0,1553.0,568.0,963.0,568.0],"score":0.99,"text":"to be higher in the peak commuting periods rather than in"},{"category_id":15,"poly":[881.0,568.0,1554.0,568.0,1554.0,599.0,881.0,599.0],"score":0.98,"text":"the off-peak periods, indicating more awareness of the timetable or"},{"category_id":15,"poly":[881.0,599.0,1323.0,599.0,1323.0,627.0,881.0,627.0],"score":0.98,"text":"historical reliability, or both, by commuters."},{"category_id":15,"poly":[905.0,1452.0,1551.0,1452.0,1551.0,1483.0,905.0,1483.0],"score":0.99,"text":"Furth and Muller study the issue in a theoretical context and gener-"},{"category_id":15,"poly":[883.0,1485.0,1553.0,1485.0,1553.0,1514.0,883.0,1514.0],"score":1.0,"text":"ally agree with the above findings (2). They are primarily concerned"},{"category_id":15,"poly":[882.0,1513.0,1553.0,1513.0,1553.0,1545.0,882.0,1545.0],"score":0.99,"text":"with the use of data from automatic vehicle-tracking systems to assess"},{"category_id":15,"poly":[880.0,1545.0,1553.0,1545.0,1553.0,1574.0,880.0,1574.0],"score":0.99,"text":"the impacts of reliability on passenger incidence behavior and wait-"},{"category_id":15,"poly":[881.0,1577.0,1551.0,1577.0,1551.0,1606.0,881.0,1606.0],"score":0.98,"text":"ing times. They propose that passengers will react to unreliability by"},{"category_id":15,"poly":[883.0,1608.0,1551.0,1608.0,1551.0,1637.0,883.0,1637.0],"score":1.0,"text":"departing earlier than they would with reliable services. Randomly"},{"category_id":15,"poly":[880.0,1636.0,1554.0,1636.0,1554.0,1669.0,880.0,1669.0],"score":1.0,"text":"incident unaware passengers will experience unreliability as a more"},{"category_id":15,"poly":[882.0,1669.0,1553.0,1669.0,1553.0,1697.0,882.0,1697.0],"score":0.99,"text":"dispersed distribution of headways and simply allocate additional"},{"category_id":15,"poly":[880.0,1699.0,1551.0,1699.0,1551.0,1726.0,880.0,1726.0],"score":0.97,"text":"time to their trip plan to improve the chance of arriving at their des-"},{"category_id":15,"poly":[881.0,1730.0,1551.0,1730.0,1551.0,1759.0,881.0,1759.0],"score":0.98,"text":"tination on time. Aware passengers, whose incidence is not entirely"},{"category_id":15,"poly":[880.0,1760.0,1552.0,1760.0,1552.0,1789.0,880.0,1789.0],"score":0.99,"text":"random, will react by timing their incidence somewhat earlier than"},{"category_id":15,"poly":[882.0,1792.0,1550.0,1792.0,1550.0,1818.0,882.0,1818.0],"score":0.99,"text":"the scheduled departure time to increase their chance of catching the"},{"category_id":15,"poly":[883.0,1823.0,1552.0,1823.0,1552.0,1849.0,883.0,1849.0],"score":0.99,"text":"desired service. The authors characterize these reactions as the costs"},{"category_id":15,"poly":[883.0,1853.0,1031.0,1853.0,1031.0,1880.0,883.0,1880.0],"score":0.95,"text":"of unreliability."},{"category_id":15,"poly":[907.0,630.0,1553.0,630.0,1553.0,658.0,907.0,658.0],"score":1.0,"text":"Bowman and Turnquist built on the concept of aware and unaware"},{"category_id":15,"poly":[881.0,662.0,1136.0,662.0,1136.0,690.0,881.0,690.0],"score":0.99,"text":"passengers of proportions"},{"category_id":15,"poly":[1155.0,662.0,1196.0,662.0,1196.0,690.0,1155.0,690.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1264.0,662.0,1553.0,662.0,1553.0,690.0,1264.0,690.0],"score":0.99,"text":",respectively. They proposed"},{"category_id":15,"poly":[881.0,692.0,1208.0,692.0,1208.0,719.0,881.0,719.0],"score":0.99,"text":"a utility-based model to estimate"},{"category_id":15,"poly":[1226.0,692.0,1552.0,692.0,1552.0,719.0,1226.0,719.0],"score":1.0,"text":"and the distribution of incidence"},{"category_id":15,"poly":[880.0,721.0,1554.0,721.0,1554.0,751.0,880.0,751.0],"score":0.99,"text":"times, and thus the mean waiting time, of aware passengers over"},{"category_id":15,"poly":[880.0,752.0,1553.0,752.0,1553.0,780.0,880.0,780.0],"score":0.98,"text":"a given headway as a function of the headway and reliability of"},{"category_id":15,"poly":[880.0,782.0,1081.0,782.0,1081.0,812.0,880.0,812.0],"score":0.99,"text":"bus departure times"},{"category_id":15,"poly":[1113.0,782.0,1552.0,782.0,1552.0,812.0,1113.0,812.0],"score":0.99,"text":". They observed seven bus stops in Chicago,"},{"category_id":15,"poly":[882.0,813.0,1553.0,813.0,1553.0,841.0,882.0,841.0],"score":0.98,"text":"Illinois, each served by a single (different) bus route, between 6:00"},{"category_id":15,"poly":[882.0,844.0,923.0,844.0,923.0,871.0,882.0,871.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[1017.0,844.0,1550.0,844.0,1550.0,871.0,1017.0,871.0],"score":0.97,"text":".for 5 to 10 days each. The bus routes had headways"},{"category_id":15,"poly":[882.0,874.0,955.0,874.0,955.0,902.0,882.0,902.0],"score":0.95,"text":"of 5to"},{"category_id":15,"poly":[1033.0,874.0,1553.0,874.0,1553.0,902.0,1033.0,902.0],"score":0.98,"text":"and a range of reliabilities. The authors found that"},{"category_id":15,"poly":[882.0,906.0,1553.0,906.0,1553.0,933.0,882.0,933.0],"score":0.99,"text":"actual average waiting time was substantially less than predicted"},{"category_id":15,"poly":[881.0,935.0,1443.0,935.0,1443.0,963.0,881.0,963.0],"score":1.0,"text":"by the random incidence model. They estimated that"},{"category_id":15,"poly":[1462.0,935.0,1553.0,935.0,1553.0,963.0,1462.0,963.0],"score":0.96,"text":"was not"},{"category_id":15,"poly":[881.0,966.0,1552.0,966.0,1552.0,994.0,881.0,994.0],"score":0.98,"text":"statistically significantly different from 1.0, which they explain by"},{"category_id":15,"poly":[880.0,994.0,1552.0,994.0,1552.0,1025.0,880.0,1025.0],"score":0.99,"text":"the fact that all observations were taken during peak commuting"},{"category_id":15,"poly":[880.0,1027.0,1552.0,1027.0,1552.0,1054.0,880.0,1054.0],"score":0.99,"text":"times. Their model predicts that the longer the headway and the"},{"category_id":15,"poly":[881.0,1058.0,1554.0,1058.0,1554.0,1086.0,881.0,1086.0],"score":0.99,"text":"more reliable the departures, the more peaked the distribution of"},{"category_id":15,"poly":[881.0,1088.0,1553.0,1088.0,1553.0,1115.0,881.0,1115.0],"score":0.98,"text":"incidence times will be and the closer that peak will be to the next"},{"category_id":15,"poly":[882.0,1119.0,1552.0,1119.0,1552.0,1148.0,882.0,1148.0],"score":1.0,"text":"scheduled departure time. This prediction demonstrates what they"},{"category_id":15,"poly":[882.0,1149.0,1552.0,1149.0,1552.0,1176.0,882.0,1176.0],"score":0.99,"text":"refer to as a safety margin that passengers add to reduce the chance"},{"category_id":15,"poly":[883.0,1181.0,1552.0,1181.0,1552.0,1206.0,883.0,1206.0],"score":0.98,"text":"of missing their bus when the service is known to be somewhat"},{"category_id":15,"poly":[882.0,1210.0,1551.0,1210.0,1551.0,1238.0,882.0,1238.0],"score":0.98,"text":"unreliable. Such a safety margin can also result from unreliability in"},{"category_id":15,"poly":[881.0,1242.0,1553.0,1242.0,1553.0,1269.0,881.0,1269.0],"score":0.99,"text":"passengers' journeys to the public transport stop or station. Bowman"},{"category_id":15,"poly":[882.0,1271.0,1553.0,1271.0,1553.0,1299.0,882.0,1299.0],"score":0.99,"text":"and Turnquist conclude from their model that the random incidence"},{"category_id":15,"poly":[880.0,1301.0,1551.0,1301.0,1551.0,1331.0,880.0,1331.0],"score":0.99,"text":"model underestimates the waiting time benefits of improving reli-"},{"category_id":15,"poly":[882.0,1332.0,1552.0,1332.0,1552.0,1362.0,882.0,1362.0],"score":0.99,"text":"ability and overestimates the waiting time benefits of increasing ser-"},{"category_id":15,"poly":[883.0,1363.0,1552.0,1363.0,1552.0,1392.0,883.0,1392.0],"score":0.99,"text":"vice frequency. This is because as reliability increases passengers"},{"category_id":15,"poly":[882.0,1394.0,1552.0,1394.0,1552.0,1422.0,882.0,1422.0],"score":0.99,"text":"can better predict departure times and so can time their incidence to"},{"category_id":15,"poly":[882.0,1423.0,1159.0,1423.0,1159.0,1452.0,882.0,1452.0],"score":0.99,"text":"decrease their waiting time."},{"category_id":15,"poly":[175.0,235.0,819.0,235.0,819.0,264.0,175.0,264.0],"score":0.99,"text":"After briefly introducing the random incidence model, which is"},{"category_id":15,"poly":[149.0,265.0,818.0,265.0,818.0,295.0,149.0,295.0],"score":0.98,"text":"often assumed to hold at short headways, the balance of this section"},{"category_id":15,"poly":[148.0,298.0,818.0,298.0,818.0,324.0,148.0,324.0],"score":0.98,"text":"reviews six studies of passenger incidence behavior that are moti-"},{"category_id":15,"poly":[148.0,327.0,818.0,327.0,818.0,356.0,148.0,356.0],"score":1.0,"text":"vated by understanding the relationships between service headway,"},{"category_id":15,"poly":[146.0,355.0,820.0,355.0,820.0,388.0,146.0,388.0],"score":0.99,"text":"service reliability, passenger incidence behavior, and passenger"},{"category_id":15,"poly":[149.0,388.0,818.0,388.0,818.0,414.0,149.0,414.0],"score":1.0,"text":"waiting time in a more nuanced fashion than is embedded in the"},{"category_id":15,"poly":[149.0,419.0,818.0,419.0,818.0,445.0,149.0,445.0],"score":1.0,"text":"random incidence assumption (2). Three of these studies depend on"},{"category_id":15,"poly":[147.0,447.0,818.0,447.0,818.0,477.0,147.0,477.0],"score":0.99,"text":"manually collected data, two studies use data from AFC systems,"},{"category_id":15,"poly":[148.0,479.0,819.0,479.0,819.0,507.0,148.0,507.0],"score":0.99,"text":"and one study analyzes the issue purely theoretically. These studies"},{"category_id":15,"poly":[147.0,509.0,819.0,509.0,819.0,537.0,147.0,537.0],"score":0.99,"text":"reveal much about passenger incidence behavior, but all are found"},{"category_id":15,"poly":[147.0,538.0,820.0,538.0,820.0,567.0,147.0,567.0],"score":0.99,"text":"to be limited in their general applicability by the methods with"},{"category_id":15,"poly":[150.0,569.0,818.0,569.0,818.0,597.0,150.0,597.0],"score":0.99,"text":"which they collect information about passengers and the services"},{"category_id":15,"poly":[147.0,599.0,458.0,599.0,458.0,630.0,147.0,630.0],"score":1.0,"text":"those passengers intend to use."},{"category_id":15,"poly":[150.0,1219.0,212.0,1219.0,212.0,1247.0,150.0,1247.0],"score":1.0,"text":"where"},{"category_id":15,"poly":[264.0,1219.0,817.0,1219.0,817.0,1247.0,264.0,1247.0],"score":0.99,"text":"is the probabilistic expectation of some random variable"},{"category_id":15,"poly":[168.0,1248.0,209.0,1248.0,209.0,1275.0,168.0,1275.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[283.0,1248.0,601.0,1248.0,601.0,1275.0,283.0,1275.0],"score":0.97,"text":"is the coefficient of variation of"},{"category_id":15,"poly":[625.0,1248.0,818.0,1248.0,818.0,1275.0,625.0,1275.0],"score":0.96,"text":".a unitless measure"},{"category_id":15,"poly":[148.0,1277.0,345.0,1277.0,345.0,1307.0,148.0,1307.0],"score":0.97,"text":"of the variability of"},{"category_id":15,"poly":[370.0,1277.0,477.0,1277.0,477.0,1307.0,370.0,1307.0],"score":0.99,"text":"defined as"},{"category_id":15,"poly":[906.0,1883.0,1552.0,1883.0,1552.0,1910.0,906.0,1910.0],"score":0.98,"text":"Luethi et al. continued with the analysis of manually collected"},{"category_id":15,"poly":[880.0,1909.0,1552.0,1909.0,1552.0,1945.0,880.0,1945.0],"score":0.99,"text":"data on actual passenger behavior (6). They use the language"},{"category_id":15,"poly":[883.0,1945.0,1552.0,1945.0,1552.0,1972.0,883.0,1972.0],"score":0.99,"text":"of probability to describe two classes of passengers. The first is"},{"category_id":15,"poly":[881.0,1973.0,1552.0,1973.0,1552.0,2003.0,881.0,2003.0],"score":1.0,"text":"timetable-dependent passengers (i.e., the aware passengers), whose"},{"category_id":15,"poly":[881.0,2006.0,1552.0,2006.0,1552.0,2033.0,881.0,2033.0],"score":1.0,"text":"incidence behavior is affected by awareness (possibly gained"},{"category_id":15,"poly":[149.0,748.0,817.0,748.0,817.0,774.0,149.0,774.0],"score":1.0,"text":"One characterization of passenger incidence behavior is that of ran-"},{"category_id":15,"poly":[148.0,777.0,818.0,777.0,818.0,806.0,148.0,806.0],"score":0.99,"text":"dom incidence (3). The key assumption underlying the random inci-"},{"category_id":15,"poly":[148.0,807.0,818.0,807.0,818.0,836.0,148.0,836.0],"score":0.99,"text":"dence model is that the process of passenger arrivals to the public"},{"category_id":15,"poly":[148.0,837.0,819.0,837.0,819.0,866.0,148.0,866.0],"score":0.99,"text":"transport service is independent from the vehicle departure process"},{"category_id":15,"poly":[148.0,868.0,818.0,868.0,818.0,897.0,148.0,897.0],"score":1.0,"text":"of the service. This implies that passengers become incident to the"},{"category_id":15,"poly":[149.0,899.0,817.0,899.0,817.0,925.0,149.0,925.0],"score":0.99,"text":"service at a random time, and thus the instantaneous rate of passen-"},{"category_id":15,"poly":[148.0,928.0,820.0,928.0,820.0,957.0,148.0,957.0],"score":1.0,"text":"ger arrivals to the service is uniform over a given period of time. Let"},{"category_id":15,"poly":[174.0,956.0,214.0,956.0,214.0,990.0,174.0,990.0],"score":1.0,"text":"and"},{"category_id":15,"poly":[239.0,956.0,818.0,956.0,818.0,990.0,239.0,990.0],"score":0.99,"text":"be random variables representing passenger waiting times"},{"category_id":15,"poly":[148.0,988.0,818.0,988.0,818.0,1016.0,148.0,1016.0],"score":1.0,"text":"and service headways, respectively. Under the random incidence"},{"category_id":15,"poly":[149.0,1019.0,818.0,1019.0,818.0,1048.0,149.0,1048.0],"score":0.98,"text":"assumption and the assumption that vehicle capacity is not a binding"},{"category_id":15,"poly":[149.0,1050.0,726.0,1050.0,726.0,1076.0,149.0,1076.0],"score":0.99,"text":"constraint, a classic result of transportation science is that"},{"category_id":15,"poly":[146.0,1793.0,818.0,1793.0,818.0,1822.0,146.0,1822.0],"score":0.98,"text":" Jolliffe and Hutchinson studied bus passenger incidence in South"},{"category_id":15,"poly":[147.0,1825.0,696.0,1825.0,696.0,1852.0,147.0,1852.0],"score":0.97,"text":"London suburbs (5). They observed 10 bus stops for"},{"category_id":15,"poly":[735.0,1825.0,817.0,1825.0,817.0,1852.0,735.0,1852.0],"score":1.0,"text":"perday"},{"category_id":15,"poly":[148.0,1855.0,819.0,1855.0,819.0,1881.0,148.0,1881.0],"score":1.0,"text":"over 8 days, recording the times of passenger incidence and actual"},{"category_id":15,"poly":[148.0,1884.0,819.0,1884.0,819.0,1912.0,148.0,1912.0],"score":0.98,"text":"and scheduled bus departures. They limited their stop selection to"},{"category_id":15,"poly":[146.0,1913.0,819.0,1913.0,819.0,1945.0,146.0,1945.0],"score":1.0,"text":"those served by only a single bus route with a single service pat-"},{"category_id":15,"poly":[147.0,1945.0,819.0,1945.0,819.0,1974.0,147.0,1974.0],"score":0.98,"text":"tern so as to avoid ambiguity about which service a passenger was"},{"category_id":15,"poly":[147.0,1972.0,820.0,1972.0,820.0,2006.0,147.0,2006.0],"score":0.98,"text":"waiting for. The authors found that the actual average passenger"},{"category_id":15,"poly":[149.0,2005.0,323.0,2005.0,323.0,2033.0,149.0,2033.0],"score":0.96,"text":"waitingtimewas"},{"category_id":15,"poly":[374.0,2005.0,819.0,2005.0,819.0,2033.0,374.0,2033.0],"score":1.0,"text":"less than predicted by the random incidence"},{"category_id":15,"poly":[148.0,686.0,625.0,686.0,625.0,721.0,148.0,721.0],"score":0.99,"text":"Random Passenger Incidence Behavior"},{"category_id":15,"poly":[151.0,1434.0,213.0,1434.0,213.0,1462.0,151.0,1462.0],"score":0.99,"text":"where"},{"category_id":15,"poly":[246.0,1434.0,521.0,1434.0,521.0,1462.0,246.0,1462.0],"score":0.98,"text":"is the standard deviation of"},{"category_id":15,"poly":[580.0,1434.0,816.0,1434.0,816.0,1462.0,580.0,1462.0],"score":0.96,"text":".The second expression"},{"category_id":15,"poly":[148.0,1466.0,819.0,1466.0,819.0,1493.0,148.0,1493.0],"score":0.99,"text":"in Equation 1 is particularly useful because it expresses the mean"},{"category_id":15,"poly":[146.0,1496.0,819.0,1496.0,819.0,1525.0,146.0,1525.0],"score":0.99,"text":"passenger waiting time as the sum of two components: the waiting"},{"category_id":15,"poly":[148.0,1526.0,818.0,1526.0,818.0,1553.0,148.0,1553.0],"score":0.98,"text":"time caused by the mean headway (i.e., the reciprocal of service fre-"},{"category_id":15,"poly":[147.0,1557.0,819.0,1557.0,819.0,1584.0,147.0,1584.0],"score":0.99,"text":"quency) and the waiting time caused by the variability of the head-"},{"category_id":15,"poly":[148.0,1588.0,818.0,1588.0,818.0,1612.0,148.0,1612.0],"score":0.97,"text":"ways (which is one measure of service reliability). When the service"},{"category_id":15,"poly":[148.0,1617.0,817.0,1617.0,817.0,1644.0,148.0,1644.0],"score":1.0,"text":"is perfectly reliable with constant headways, the mean waiting time"},{"category_id":15,"poly":[148.0,1646.0,472.0,1646.0,472.0,1677.0,148.0,1677.0],"score":0.99,"text":"will be simply half the headway."},{"category_id":15,"poly":[151.0,176.0,817.0,176.0,817.0,204.0,151.0,204.0],"score":0.99,"text":"dependent on the service headway and the reliability of the departure"},{"category_id":15,"poly":[147.0,205.0,652.0,205.0,652.0,236.0,147.0,236.0],"score":0.99,"text":"time of the service to which passengers are incident."},{"category_id":15,"poly":[149.0,1735.0,702.0,1735.0,702.0,1767.0,149.0,1767.0],"score":0.98,"text":"More Behaviorally Realistic Incidence Models"},{"category_id":15,"poly":[1519.0,98.0,1554.0,98.0,1554.0,125.0,1519.0,125.0],"score":1.0,"text":"53"},{"category_id":15,"poly":[148.0,98.0,322.0,98.0,322.0,123.0,148.0,123.0],"score":1.0,"text":"Frumin and Zhao"}],"page_info":{"page_no":0,"height":2200,"width":1700}}]
\ No newline at end of file
--- a/tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf
+++ b/tests/unittest/test_tools/assets/cli_dev/cli_test_01.pdf
--- a/tests/unittest/test_tools/assets/common/cli_test_01.pdf
+++ b/tests/unittest/test_tools/assets/common/cli_test_01.pdf
--- a/tests/unittest/test_tools/test_cli.py
+++ b/tests/unittest/test_tools/test_cli.py
-import os
-import shutil
-import tempfile
-
-from click.testing import CliRunner
-
-from magic_pdf.tools.cli import cli
-
-
-def test_cli_pdf():
-    # setup
-    unitest_dir = '/tmp/magic_pdf/unittest/tools'
-    filename = 'cli_test_01'
-    os.makedirs(unitest_dir, exist_ok=True)
-    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
-
-    # run
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        [
-            '-p',
-            'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf',
-            '-o',
-            temp_output_dir,
-        ],
-    )
-
-    # check
-    assert result.exit_code == 0
-
-    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
-    assert r.st_size > 7000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
-    assert r.st_size > 200000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
-    assert r.st_size > 15000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
-    assert r.st_size > 400000
-
-    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
-
-    # teardown
-    shutil.rmtree(temp_output_dir)
-
-
-def test_cli_path():
-    # setup
-    unitest_dir = '/tmp/magic_pdf/unittest/tools'
-    os.makedirs(unitest_dir, exist_ok=True)
-    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
-
-    # run
-    runner = CliRunner()
-    result = runner.invoke(
-        cli, ['-p', 'tests/unittest/test_tools/assets/cli/path', '-o', temp_output_dir]
-    )
-
-    # check
-    assert result.exit_code == 0
-
-    filename = 'cli_test_01'
-    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
-    assert r.st_size > 7000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
-    assert r.st_size > 200000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
-    assert r.st_size > 15000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
-    assert r.st_size > 400000
-
-    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
-
-    base_output_dir = os.path.join(temp_output_dir, 'cli_test_02/auto')
-    filename = 'cli_test_02'
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
-    assert r.st_size > 5000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
-    assert r.st_size > 200000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
-    assert r.st_size > 15000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
-    assert r.st_size > 400000
-
-    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
-
-    # teardown
-    shutil.rmtree(temp_output_dir)
--- a/tests/unittest/test_tools/test_cli_dev.py
+++ b/tests/unittest/test_tools/test_cli_dev.py
-import os
-import shutil
-import tempfile
-
-from click.testing import CliRunner
-
-from magic_pdf.tools import cli_dev
-
-
-def test_cli_pdf():
-    # setup
-    unitest_dir = '/tmp/magic_pdf/unittest/tools'
-    filename = 'cli_test_01'
-    os.makedirs(unitest_dir, exist_ok=True)
-    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
-
-    # run
-    runner = CliRunner()
-    result = runner.invoke(
-        cli_dev.cli,
-        [
-            'pdf',
-            '-p',
-            'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf',
-            '-j',
-            'tests/unittest/test_tools/assets/cli_dev/cli_test_01.model.json',
-            '-o',
-            temp_output_dir,
-        ],
-    )
-
-    # check
-    assert result.exit_code == 0
-
-    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
-    assert r.st_size > 5000
-    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
-    assert r.st_size > 7000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
-    assert r.st_size > 200000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
-    assert r.st_size > 15000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
-    assert r.st_size > 400000
-
-    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
-
-    # teardown
-    shutil.rmtree(temp_output_dir)
-
-
-def test_cli_jsonl():
-    # setup
-    unitest_dir = '/tmp/magic_pdf/unittest/tools'
-    filename = 'cli_test_01'
-    os.makedirs(unitest_dir, exist_ok=True)
-    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
-
-    def mock_read_s3_path(s3path):
-        with open(s3path, 'rb') as f:
-            return f.read()
-
-    cli_dev.read_s3_path = mock_read_s3_path  # mock
-
-    # run
-    runner = CliRunner()
-    result = runner.invoke(
-        cli_dev.cli,
-        [
-            'jsonl',
-            '-j',
-            'tests/unittest/test_tools/assets/cli_dev/cli_test_01.jsonl',
-            '-o',
-            temp_output_dir,
-        ],
-    )
-
-    # check
-    assert result.exit_code == 0
-
-    base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
-    assert r.st_size > 5000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
-    assert r.st_size > 7000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
-    assert r.st_size > 200000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
-    assert r.st_size > 15000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
-    assert r.st_size > 400000
-
-    assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
-    assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
-
-    # teardown
-    shutil.rmtree(temp_output_dir)
--- a/tests/unittest/test_tools/test_common.py
+++ b/tests/unittest/test_tools/test_common.py
-import os
-import shutil
-import tempfile
-
-import pytest
-
-from magic_pdf.tools.common import do_parse
-
-
-@pytest.mark.parametrize('method', ['auto', 'txt', 'ocr'])
-def test_common_do_parse(method):
-    import magic_pdf.model as model_config
-    model_config.__use_inside_model__ = True
-    # setup
-    unitest_dir = '/tmp/magic_pdf/unittest/tools'
-    filename = 'fake'
-    os.makedirs(unitest_dir, exist_ok=True)
-
-    temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
-
-    # run
-    with open('tests/unittest/test_tools/assets/common/cli_test_01.pdf', 'rb') as f:
-        bits = f.read()
-    do_parse(temp_output_dir,
-             filename,
-             bits, [],
-             method,
-             False,
-             f_dump_content_list=True)
-
-    # check
-    base_output_dir = os.path.join(temp_output_dir, f'fake/{method}')
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
-    assert r.st_size > 5000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
-    assert r.st_size > 7000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
-    assert r.st_size > 200000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
-    assert r.st_size > 15000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
-    assert r.st_size > 400000
-
-    r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
-    assert r.st_size > 400000
-
-    os.path.exists(os.path.join(base_output_dir, 'images'))
-    os.path.isdir(os.path.join(base_output_dir, 'images'))
-
-    # teardown
-    shutil.rmtree(temp_output_dir)
--- a/tests/unittest/test_unit.py
+++ b/tests/unittest/test_unit.py
-import os
-
-import pytest
-
-from magic_pdf.libs.boxbase import (__is_overlaps_y_exceeds_threshold,
-                                    _is_bottom_full_overlap, _is_in,
-                                    _is_in_or_part_overlap,
-                                    _is_in_or_part_overlap_with_area_ratio,
-                                    _is_left_overlap, _is_part_overlap,
-                                    _is_vertical_full_overlap, _left_intersect,
-                                    _right_intersect, bbox_distance,
-                                    bbox_relative_pos, calculate_iou,
-                                    calculate_overlap_area_2_minbox_area_ratio,
-                                    calculate_overlap_area_in_bbox1_area_ratio,
-                                    find_bottom_nearest_text_bbox,
-                                    find_left_nearest_text_bbox,
-                                    find_right_nearest_text_bbox,
-                                    find_top_nearest_text_bbox,
-                                    get_bbox_in_boundary,
-                                    get_minbox_if_overlap_by_ratio)
-from magic_pdf.libs.commons import get_top_percent_list, join_path, mymax
-from magic_pdf.libs.config_reader import get_s3_config
-from magic_pdf.libs.path_utils import parse_s3path
-
-
-# 输入一个列表，如果列表空返回0，否则返回最大元素
-@pytest.mark.parametrize('list_input, target_num',
-                         [
-                             ([0, 0, 0, 0], 0),
-                             ([0], 0),
-                             ([1, 2, 5, 8, 4], 8),
-                             ([], 0),
-                             ([1.1, 7.6, 1.009, 9.9], 9.9),
-                             ([1.0 * 10 ** 2, 3.5 * 10 ** 3, 0.9 * 10 ** 6], 0.9 * 10 ** 6),
-                         ])
-def test_list_max(list_input: list, target_num) -> None:
-    """
-    list_input: 输入列表元素，元素均为数字类型
-    """
-    assert target_num == mymax(list_input)
-
-
-# 连接多个参数生成路径信息，使用"/"作为连接符，生成的结果需要是一个合法路径
-@pytest.mark.parametrize('path_input, target_path', [
-    (['https:', '', 'www.baidu.com'], 'https://www.baidu.com'),
-    (['https:', 'www.baidu.com'], 'https:/www.baidu.com'),
-    (['D:', 'file', 'pythonProject', 'demo' + '.py'], 'D:/file/pythonProject/demo.py'),
-])
-def test_join_path(path_input: list, target_path: str) -> None:
-    """
-    path_input: 输入path的列表，列表元素均为字符串
-    """
-    assert target_path == join_path(*path_input)
-
-
-# 获取列表中前百分之多少的元素
-@pytest.mark.parametrize('num_list, percent, target_num_list', [
-    ([], 0.75, []),
-    ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0.8, [23, 9, 7, 3, 0, -1, -5, -7]),
-    ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11], 0, []),
-    ([-5, -10, 9, 3, 7, -7, 0, 23, -1, -11, 28], 0.8, [28, 23, 9, 7, 3, 0, -1, -5])
-])
-def test_get_top_percent_list(num_list: list, percent: float, target_num_list: list) -> None:
-    """
-    num_list: 数字列表，列表元素为数字
-    percent: 占比，float, 向下取证
-    """
-    assert target_num_list == get_top_percent_list(num_list, percent)
-
-
-# 输入一个s3路径，返回bucket名字和其余部分(key)
-@pytest.mark.parametrize('s3_path, target_data', [
-    ('s3://bucket/path/to/my/file.txt', 'bucket'),
-    ('s3a://bucket1/path/to/my/file2.txt', 'bucket1'),
-    # ("/path/to/my/file1.txt", "path"),
-    # ("bucket/path/to/my/file2.txt", "bucket"),
-])
-def test_parse_s3path(s3_path: str, target_data: str):
-    """
-    s3_path: s3路径
-        如果为无效路径，则返回对应的bucket名字和其余部分
-        如果为异常路径 例如：file2.txt，则报异常
-    """
-    bucket_name, key = parse_s3path(s3_path)
-    assert target_data == bucket_name
-
-
-# 2个box是否处于包含或者部分重合关系。
-# 如果某边界重合算重合。
-# 部分边界重合，其他在内部也算包含
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    ((120, 133, 223, 248), (128, 168, 269, 295), True),
-    ((137, 53, 245, 157), (134, 11, 200, 147), True),  # 部分重合
-    ((137, 56, 211, 116), (140, 66, 202, 199), True),  # 部分重合
-    ((42, 34, 69, 65), (42, 34, 69, 65), True),  # 部分重合
-    ((39, 63, 87, 106), (37, 66, 85, 109), True),  # 部分重合
-    ((13, 37, 55, 66), (7, 46, 49, 75), True),  # 部分重合
-    ((56, 83, 85, 104), (64, 85, 93, 106), True),  # 部分重合
-    ((12, 53, 48, 94), (14, 53, 50, 94), True),  # 部分重合
-    ((43, 54, 93, 131), (55, 82, 77, 106), True),  # 包含
-    ((63, 2, 134, 71), (72, 43, 104, 78), True),  # 包含
-    ((25, 57, 109, 127), (26, 73, 49, 95), True),  # 包含
-    ((24, 47, 111, 115), (34, 81, 58, 106), True),  # 包含
-    ((34, 8, 105, 83), (76, 20, 116, 45), True),  # 包含
-])
-def test_is_in_or_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    """
-    box1: 坐标数组
-    box2: 坐标数组
-    """
-    assert target_bool == _is_in_or_part_overlap(box1, box2)
-
-
-# 如果box1在box2内部，返回True
-#   如果是部分重合的，则重合面积占box1的比例大于阈值时候返回True
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    ((35, 28, 108, 90), (47, 60, 83, 96), False),  # 包含 box1 up box2,  box2 多半,box1少半
-    ((65, 151, 92, 177), (49, 99, 105, 198), True),  # 包含 box1 in box2
-    ((80, 62, 112, 84), (74, 40, 144, 111), True),  # 包含 box1 in box2
-    ((65, 88, 127, 144), (92, 102, 131, 139), False),  # 包含 box2 多半，box1约一半
-    ((92, 102, 131, 139), (65, 88, 127, 144), True),  # 包含 box1 多半
-    ((100, 93, 199, 168), (169, 126, 198, 165), False),  # 包含 box2 in box1
-    ((26, 75, 106, 172), (65, 108, 90, 128), False),  # 包含 box2 in box1
-    ((28, 90, 77, 126), (35, 84, 84, 120), True),  # 相交 box1多半，box2多半
-    ((37, 6, 69, 52), (28, 3, 60, 49), True),  # 相交 box1多半，box2多半
-    ((94, 29, 133, 60), (84, 30, 123, 61), True),  # 相交 box1多半，box2多半
-])
-def test_is_in_or_part_overlap_with_area_ratio(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    out_bool = _is_in_or_part_overlap_with_area_ratio(box1, box2)
-    assert target_bool == out_bool
-
-
-# box1在box2内部或者box2在box1内部返回True。如果部分边界重合也算作包含。
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    # ((), (), "Error"),  # Error
-    ((65, 151, 92, 177), (49, 99, 105, 198), True),  # 包含 box1 in box2
-    ((80, 62, 112, 84), (74, 40, 144, 111), True),  # 包含 box1 in box2
-    ((76, 140, 154, 277), (121, 326, 192, 384), False),  # 分离
-    ((65, 88, 127, 144), (92, 102, 131, 139), False),  # 包含 box2 多半，box1约一半
-    ((92, 102, 131, 139), (65, 88, 127, 144), False),  # 包含 box1 多半
-    ((68, 94, 118, 120), (68, 90, 118, 122), True),  # 包含，box1 in box2 两边x相切
-    ((69, 94, 118, 120), (68, 90, 118, 122), True),  # 包含，box1 in box2 一边x相切
-    ((69, 114, 118, 122), (68, 90, 118, 122), True),  # 包含，box1 in box2 一边y相切
-    # ((100, 93, 199, 168), (169, 126, 198, 165), True),  # 包含 box2 in box1  Error
-    # ((26, 75, 106, 172), (65, 108, 90, 128), True),  # 包含 box2 in box1  Error
-    # ((38, 94, 122, 120), (68, 94, 118, 120), True),  # 包含，box2 in box1 两边y相切 Error
-    # ((68, 34, 118, 158), (68, 94, 118, 120), True),  # 包含，box2 in box1 两边x相切 Error
-    # ((68, 34, 118, 158), (68, 94, 84, 120), True),  # 包含，box2 in box1 一边x相切 Error
-    # ((27, 94, 118, 158), (68, 94, 84, 120), True),  # 包含，box2 in box1 一边y相切 Error
-])
-def test_is_in(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == _is_in(box1, box2)
-
-
-# 仅仅是部分包含关系，返回True，如果是完全包含关系则返回False
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    ((65, 151, 92, 177), (49, 99, 105, 198), False),  # 包含 box1 in box2
-    ((80, 62, 112, 84), (74, 40, 144, 111), False),  # 包含 box1 in box2
-    # ((76, 140, 154, 277), (121, 326, 192, 384), False),  # 分离  Error
-    ((76, 140, 154, 277), (121, 277, 192, 384), True),  # 外相切
-    ((65, 88, 127, 144), (92, 102, 131, 139), True),  # 包含 box2 多半，box1约一半
-    ((92, 102, 131, 139), (65, 88, 127, 144), True),  # 包含 box1 多半
-    ((68, 94, 118, 120), (68, 90, 118, 122), False),  # 包含，box1 in box2 两边x相切
-    ((69, 94, 118, 120), (68, 90, 118, 122), False),  # 包含，box1 in box2 一边x相切
-    ((69, 114, 118, 122), (68, 90, 118, 122), False),  # 包含，box1 in box2 一边y相切
-    # ((26, 75, 106, 172), (65, 108, 90, 128), False),  # 包含 box2 in box1  Error
-    # ((38, 94, 122, 120), (68, 94, 118, 120), False),  # 包含，box2 in box1 两边y相切 Error
-    # ((68, 34, 118, 158), (68, 94, 84, 120), False),  # 包含，box2 in box1 一边x相切 Error
-
-])
-def test_is_part_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == _is_part_overlap(box1, box2)
-
-
-# left_box右侧是否和right_box左侧有部分重叠
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    (None, None, False),
-    ((88, 81, 222, 173), (60, 221, 123, 358), False),  # 分离
-    ((121, 149, 184, 289), (172, 130, 230, 268), True),  # box1 left bottom box2 相交
-    ((172, 130, 230, 268), (121, 149, 184, 289), False),  # box2 left bottom box1 相交
-    ((109, 68, 182, 146), (215, 188, 277, 253), False),  # box1 top left box2 分离
-    ((117, 53, 222, 176), (174, 142, 298, 276), True),  # box1 left top box2 相交
-    ((174, 142, 298, 276), (117, 53, 222, 176), False),  # box2 left top box1 相交
-    ((65, 88, 127, 144), (92, 102, 131, 139), True),  # box1 left box2 y:box2 in box1
-    ((92, 102, 131, 139), (65, 88, 127, 144), False),  # box2 left box1 y:box1 in box2
-    ((182, 130, 230, 268), (121, 149, 174, 289), False),  # box2 left box1 分离
-    ((1, 10, 26, 45), (3, 4, 20, 39), True),  # box1 bottom box2 x:box2 in box1
-])
-def test_left_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == _left_intersect(box1, box2)
-
-
-# left_box左侧是否和right_box右侧部分重叠
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    (None, None, False),
-    ((88, 81, 222, 173), (60, 221, 123, 358), False),  # 分离
-    ((121, 149, 184, 289), (172, 130, 230, 268), False),  # box1 left bottom box2 相交
-    ((172, 130, 230, 268), (121, 149, 184, 289), True),  # box2 left bottom box1 相交
-    ((109, 68, 182, 146), (215, 188, 277, 253), False),  # box1 top left box2 分离
-    ((117, 53, 222, 176), (174, 142, 298, 276), False),  # box1 left top box2 相交
-    ((174, 142, 298, 276), (117, 53, 222, 176), True),  # box2 left top box1 相交
-    ((65, 88, 127, 144), (92, 102, 131, 139), False),  # box1 left box2 y:box2 in box1
-    # ((92, 102, 131, 139), (65, 88, 127, 144), True),  # box2 left box1 y:box1 in box2 Error
-    ((182, 130, 230, 268), (121, 149, 174, 289), False),  # box2 left box1 分离
-    # ((1, 10, 26, 45), (3, 4, 20, 39), False),  # box1 bottom box2 x:box2 in box1 Error
-])
-def test_right_intersect(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == _right_intersect(box1, box2)
-
-
-# x方向上：要么box1包含box2, 要么box2包含box1。不能部分包含
-# y方向上：box1和box2有重叠
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    # (None, None, False),  # Error
-    ((35, 28, 108, 90), (47, 60, 83, 96), True),  # box1 top box2, x:box2 in box1, y:有重叠
-    ((35, 28, 98, 90), (27, 60, 103, 96), True),  # box1 top box2, x:box1 in box2, y:有重叠
-    ((57, 77, 130, 210), (59, 219, 119, 293), False),  # box1 top box2, x: box2 in box1, y:无重叠
-    ((47, 60, 83, 96), (35, 28, 108, 90), True),  # box2 top box1, x:box1 in box2, y:有重叠
-    ((27, 60, 103, 96), (35, 28, 98, 90), True),  # box2 top box1, x:box2 in box1, y:有重叠
-    ((59, 219, 119, 293), (57, 77, 130, 210), False),  # box2 top box1, x: box1 in box2, y:无重叠
-    ((35, 28, 55, 90), (57, 60, 83, 96), False),  # box1 top box2, x:无重叠, y:有重叠
-    ((47, 60, 63, 96), (65, 28, 108, 90), False),  # box2 top box1, x:无重叠, y:有重叠
-])
-def test_is_vertical_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == _is_vertical_full_overlap(box1, box2)
-
-
-# 检查box1下方和box2的上方有轻微的重叠，轻微程度收到y_tolerance的限制
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    (None, None, False),
-    ((35, 28, 108, 90), (47, 89, 83, 116), True),  # box1 top box2, y:有重叠
-    ((35, 28, 108, 90), (47, 60, 83, 96), False),  # box1 top box2, y:有重叠且过多
-    ((57, 77, 130, 210), (59, 219, 119, 293), False),  # box1 top box2, y:无重叠
-    ((47, 60, 83, 96), (35, 28, 108, 90), False),  # box2 top box1, y:有重叠且过多
-    ((27, 89, 103, 116), (35, 28, 98, 90), False),  # box2 top box1, y:有重叠
-    ((59, 219, 119, 293), (57, 77, 130, 210), False),  # box2 top box1, y:无重叠
-])
-def test_is_bottom_full_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == _is_bottom_full_overlap(box1, box2)
-
-
-# 检查box1的左侧是否和box2有重叠
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    (None, None, False),
-    ((88, 81, 222, 173), (60, 221, 123, 358), False),  # 分离
-    # ((121, 149, 184, 289), (172, 130, 230, 268), False),  # box1 left bottom box2 相交  Error
-    # ((172, 130, 230, 268), (121, 149, 184, 289), True),  # box2 left bottom box1 相交 Error
-    ((109, 68, 182, 146), (215, 188, 277, 253), False),  # box1 top left box2 分离
-    ((117, 53, 222, 176), (174, 142, 298, 276), False),  # box1 left top box2 相交
-    # ((174, 142, 298, 276), (117, 53, 222, 176), True),  # box2 left top box1 相交  Error
-    # ((65, 88, 127, 144), (92, 102, 131, 139), False),  # box1 left box2 y:box2 in box1 Error
-    ((1, 10, 26, 45), (3, 4, 20, 39), True),  # box1 middle bottom box2 x:box2 in box1
-
-])
-def test_is_left_overlap(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == _is_left_overlap(box1, box2)
-
-
-# 查两个bbox在y轴上是否有重叠，并且该重叠区域的高度占两个bbox高度更低的那个超过阈值
-@pytest.mark.parametrize('box1, box2, target_bool', [
-    # (None, None, "Error"),  # Error
-    ((51, 69, 192, 147), (75, 48, 132, 187), True),  # y: box1 in box2
-    ((51, 39, 192, 197), (75, 48, 132, 187), True),  # y: box2 in box1
-    ((88, 81, 222, 173), (60, 221, 123, 358), False),  # y: box1 top box2
-    ((109, 68, 182, 196), (215, 188, 277, 253), False),  # y: box1 top box2 little
-    ((109, 68, 182, 196), (215, 78, 277, 253), True),  # y: box1 top box2 more
-    ((109, 68, 182, 196), (215, 138, 277, 213), False),  # y: box1 top box2 more but lower overlap_ratio_threshold
-    ((109, 68, 182, 196), (215, 138, 277, 203), True),  # y: box1 top box2 more and more overlap_ratio_threshold
-])
-def test_is_overlaps_y_exceeds_threshold(box1: tuple, box2: tuple, target_bool: bool) -> None:
-    assert target_bool == __is_overlaps_y_exceeds_threshold(box1, box2)
-
-
-# Determine the coordinates of the intersection rectangle
-@pytest.mark.parametrize('box1, box2, target_num', [
-    # (None, None, "Error"),  # Error
-    ((88, 81, 222, 173), (60, 221, 123, 358), 0.0),  # 分离
-    ((76, 140, 154, 277), (121, 326, 192, 384), 0.0),  # 分离
-    ((142, 109, 238, 164), (134, 211, 224, 270), 0.0),  # 分离
-    ((109, 68, 182, 196), (175, 138, 277, 213), 0.024475524475524476),  # 相交
-    ((56, 90, 170, 219), (103, 212, 171, 304), 0.02288586346557361),  # 相交
-    ((109, 126, 204, 245), (130, 127, 232, 186), 0.33696071621517326),  # 相交
-    ((109, 126, 204, 245), (110, 127, 232, 206), 0.5493822593770807),  # 相交
-    ((76, 140, 154, 277), (121, 277, 192, 384), 0.0)  # 相切
-])
-def test_calculate_iou(box1: tuple, box2: tuple, target_num: float) -> None:
-    assert target_num == calculate_iou(box1, box2)
-
-
-# 计算box1和box2的重叠面积占最小面积的box的比例
-@pytest.mark.parametrize('box1, box2, target_num', [
-    # (None, None, "Error"),  # Error
-    ((142, 109, 238, 164), (134, 211, 224, 270), 0.0),  # 分离
-    ((88, 81, 222, 173), (60, 221, 123, 358), 0.0),  # 分离
-    ((76, 140, 154, 277), (121, 326, 192, 384), 0.0),  # 分离
-    ((76, 140, 154, 277), (121, 277, 192, 384), 0.0),  # 相切
-    ((109, 126, 204, 245), (110, 127, 232, 206), 0.7704918032786885),  # 相交
-    ((56, 90, 170, 219), (103, 212, 171, 304), 0.07496803069053709),  # 相交
-    ((121, 149, 184, 289), (172, 130, 230, 268), 0.17841079460269865),  # 相交
-    ((51, 69, 192, 147), (75, 48, 132, 187), 0.5611510791366906),  # 相交
-    ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075),  # 相交
-    ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417),  # 相交
-    ((109, 126, 204, 245), (130, 127, 232, 186), 0.7254901960784313),  # 相交
-])
-def test_calculate_overlap_area_2_minbox_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None:
-    assert target_num == calculate_overlap_area_2_minbox_area_ratio(box1, box2)
-
-
-# 计算box1和box2的重叠面积占bbox1的比例
-@pytest.mark.parametrize('box1, box2, target_num', [
-    # (None, None, "Error"),  # Error
-    ((142, 109, 238, 164), (134, 211, 224, 270), 0.0),  # 分离
-    ((88, 81, 222, 173), (60, 221, 123, 358), 0.0),  # 分离
-    ((76, 140, 154, 277), (121, 326, 192, 384), 0.0),  # 分离
-    ((76, 140, 154, 277), (121, 277, 192, 384), 0.0),  # 相切
-    ((142, 109, 238, 164), (134, 164, 224, 270), 0.0),  # 相切
-    ((109, 126, 204, 245), (110, 127, 232, 206), 0.6568774878372402),  # 相交
-    ((56, 90, 170, 219), (103, 212, 171, 304), 0.03189174486604107),  # 相交
-    ((121, 149, 184, 289), (172, 130, 230, 268), 0.1619047619047619),  # 相交
-    ((51, 69, 192, 147), (75, 48, 132, 187), 0.40425531914893614),  # 相交
-    ((117, 53, 222, 176), (174, 142, 298, 276), 0.12636469221835075),  # 相交
-    ((102, 60, 233, 203), (70, 190, 220, 319), 0.08188757807078417),  # 相交
-    ((109, 126, 204, 245), (130, 127, 232, 186), 0.38620079610791685),  # 相交
-])
-def test_calculate_overlap_area_in_bbox1_area_ratio(box1: tuple, box2: tuple, target_num: float) -> None:
-    assert target_num == calculate_overlap_area_in_bbox1_area_ratio(box1, box2)
-
-
-# 计算两个bbox重叠的面积占最小面积的box的比例，如果比例大于ratio，则返回小的那个bbox,否则返回None
-@pytest.mark.parametrize('box1, box2, ratio, target_box', [
-    # (None, None, 0.8, "Error"),  # Error
-    ((142, 109, 238, 164), (134, 211, 224, 270), 0.0, None),  # 分离
-    ((109, 126, 204, 245), (110, 127, 232, 206), 0.5, (110, 127, 232, 206)),
-    ((56, 90, 170, 219), (103, 212, 171, 304), 0.5, None),
-    ((121, 149, 184, 289), (172, 130, 230, 268), 0.5, None),
-    ((51, 69, 192, 147), (75, 48, 132, 187), 0.5, (75, 48, 132, 187)),
-    ((117, 53, 222, 176), (174, 142, 298, 276), 0.5, None),
-    ((102, 60, 233, 203), (70, 190, 220, 319), 0.5, None),
-    ((109, 126, 204, 245), (130, 127, 232, 186), 0.5, (130, 127, 232, 186)),
-])
-def test_get_minbox_if_overlap_by_ratio(box1: tuple, box2: tuple, ratio: float, target_box: list) -> None:
-    assert target_box == get_minbox_if_overlap_by_ratio(box1, box2, ratio)
-
-
-# 根据boundry获取在这个范围内的所有的box的列表，完全包含关系
-@pytest.mark.parametrize('boxes, boundary, target_boxs', [
-    # ([], (), "Error"),  # Error
-    ([], (110, 340, 209, 387), []),
-    ([(142, 109, 238, 164)], (134, 211, 224, 270), []),  # 分离
-    ([(109, 126, 204, 245), (110, 127, 232, 206)], (105, 116, 258, 300), [(109, 126, 204, 245), (110, 127, 232, 206)]),
-    ([(109, 126, 204, 245), (110, 127, 232, 206)], (105, 116, 258, 230), [(110, 127, 232, 206)]),
-    ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
-      (137, 29, 287, 87)], (80, 90, 249, 200), []),
-    ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
-      (137, 29, 287, 87)], (30, 20, 349, 320),
-     [(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
-      (137, 29, 287, 87)]),
-    ([(81, 280, 123, 315), (282, 203, 342, 247), (183, 100, 300, 155), (46, 99, 133, 148), (33, 156, 97, 211),
-      (137, 29, 287, 87)], (30, 20, 200, 320),
-     [(81, 280, 123, 315), (46, 99, 133, 148), (33, 156, 97, 211)]),
-])
-def test_get_bbox_in_boundary(boxes: list, boundary: tuple, target_boxs: list) -> None:
-    assert target_boxs == get_bbox_in_boundary(boxes, boundary)
-
-
-# 寻找上方距离最近的box,margin 4个单位， x方向有重合，y方向最近的
-@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [
-    ([{'bbox': (81, 280, 123, 315)}, {'bbox': (282, 203, 342, 247)}, {'bbox': (183, 100, 300, 155)},
-      {'bbox': (46, 99, 133, 148)}, {'bbox': (33, 156, 97, 211)},
-      {'bbox': (137, 29, 287, 87)}], (81, 280, 123, 315), {'bbox': (33, 156, 97, 211)}),
-    # ([{"bbox": (168, 120, 263, 159)},
-    #   {"bbox": (231, 61, 279, 159)},
-    #   {"bbox": (35, 85, 136, 110)},
-    #   {"bbox": (228, 193, 347, 225)},
-    #   {"bbox": (144, 264, 188, 323)},
-    #   {"bbox": (62, 37, 126, 64)}], (228, 193, 347, 225),
-    #  [{"bbox": (168, 120, 263, 159)}, {"bbox": (231, 61, 279, 159)}]),  # y：方向最近的有两个，x: 两个均有重合 Error
-    ([{'bbox': (35, 85, 136, 159)},
-      {'bbox': (168, 120, 263, 159)},
-      {'bbox': (231, 61, 279, 118)},
-      {'bbox': (228, 193, 347, 225)},
-      {'bbox': (144, 264, 188, 323)},
-      {'bbox': (62, 37, 126, 64)}], (228, 193, 347, 225),
-     {'bbox': (168, 120, 263, 159)},),  # y:方向最近的有两个，x:只有一个有重合
-    ([{'bbox': (239, 115, 379, 167)},
-      {'bbox': (33, 237, 104, 262)},
-      {'bbox': (124, 288, 168, 325)},
-      {'bbox': (242, 291, 379, 340)},
-      {'bbox': (55, 117, 121, 154)},
-      {'bbox': (266, 183, 384, 217)}, ], (124, 288, 168, 325), {'bbox': (55, 117, 121, 154)}),
-    ([{'bbox': (239, 115, 379, 167)},
-      {'bbox': (33, 237, 104, 262)},
-      {'bbox': (124, 288, 168, 325)},
-      {'bbox': (242, 291, 379, 340)},
-      {'bbox': (55, 117, 119, 154)},
-      {'bbox': (266, 183, 384, 217)}, ], (124, 288, 168, 325), None),  # x没有重合
-    ([{'bbox': (80, 90, 249, 200)},
-      {'bbox': (183, 100, 240, 155)}, ], (183, 100, 240, 155), None),  # 包含
-])
-def test_find_top_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
-    assert target_boxs == find_top_nearest_text_bbox(pymu_blocks, obj_box)
-
-
-# 寻找下方距离自己最近的box, x方向有重合，y方向最近的
-@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [
-    ([{'bbox': (165, 96, 300, 114)},
-      {'bbox': (11, 157, 139, 201)},
-      {'bbox': (124, 208, 265, 262)},
-      {'bbox': (124, 283, 248, 306)},
-      {'bbox': (39, 267, 84, 301)},
-      {'bbox': (36, 89, 114, 145)}, ], (165, 96, 300, 114), {'bbox': (124, 208, 265, 262)}),
-    ([{'bbox': (187, 37, 303, 49)},
-      {'bbox': (2, 227, 90, 283)},
-      {'bbox': (158, 174, 200, 212)},
-      {'bbox': (259, 174, 324, 228)},
-      {'bbox': (205, 61, 316, 97)},
-      {'bbox': (295, 248, 374, 287)}, ], (205, 61, 316, 97), {'bbox': (259, 174, 324, 228)}),  # y有两个最近的, x只有一个重合
-    # ([{"bbox": (187, 37, 303, 49)},
-    #   {"bbox": (2, 227, 90, 283)},
-    #   {"bbox": (259, 174, 324, 228)},
-    #   {"bbox": (205, 61, 316, 97)},
-    #   {"bbox": (295, 248, 374, 287)},
-    #   {"bbox": (158, 174, 209, 212)}, ], (205, 61, 316, 97),
-    #  [{"bbox": (259, 174, 324, 228)}, {"bbox": (158, 174, 209, 212)}]),  # x有重合，y有两个最近的  Error
-    ([{'bbox': (287, 132, 398, 191)},
-      {'bbox': (44, 141, 163, 188)},
-      {'bbox': (132, 191, 240, 241)},
-      {'bbox': (81, 25, 142, 67)},
-      {'bbox': (74, 297, 116, 314)},
-      {'bbox': (77, 84, 224, 107)}, ], (287, 132, 398, 191), None),  # x没有重合
-    ([{'bbox': (80, 90, 249, 200)},
-      {'bbox': (183, 100, 240, 155)}, ], (183, 100, 240, 155), None),  # 包含
-])
-def test_find_bottom_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
-    assert target_boxs == find_bottom_nearest_text_bbox(pymu_blocks, obj_box)
-
-
-# 寻找左侧距离自己最近的box, y方向有重叠，x方向最近
-@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [
-    ([{'bbox': (80, 90, 249, 200)}, {'bbox': (183, 100, 240, 155)}], (183, 100, 240, 155), None),  # 包含
-    ([{'bbox': (28, 90, 77, 126)}, {'bbox': (35, 84, 84, 120)}], (35, 84, 84, 120), None),  # y:重叠，x:重叠大于2
-    ([{'bbox': (28, 90, 77, 126)}, {'bbox': (75, 84, 134, 120)}], (75, 84, 134, 120), {'bbox': (28, 90, 77, 126)}),
-    # y:重叠，x:重叠小于等于2
-    ([{'bbox': (239, 115, 379, 167)},
-      {'bbox': (33, 237, 104, 262)},
-      {'bbox': (124, 288, 168, 325)},
-      {'bbox': (242, 291, 379, 340)},
-      {'bbox': (55, 113, 161, 154)},
-      {'bbox': (266, 123, 384, 217)}], (266, 123, 384, 217), {'bbox': (55, 113, 161, 154)}),  # y重叠，x left
-    # ([{"bbox": (136, 219, 268, 240)},
-    #   {"bbox": (169, 115, 268, 181)},
-    #   {"bbox": (33, 237, 104, 262)},
-    #   {"bbox": (124, 288, 168, 325)},
-    #   {"bbox": (55, 117, 161, 154)},
-    #   {"bbox": (266, 183, 384, 217)}], (266, 183, 384, 217),
-    #  [{"bbox": (136, 219, 267, 240)}, {"bbox": (169, 115, 267, 181)}]),  # y有重叠，x重叠小于2或者在left Error
-])
-def test_find_left_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
-    assert target_boxs == find_left_nearest_text_bbox(pymu_blocks, obj_box)
-
-
-# 寻找右侧距离自己最近的box, y方向有重叠，x方向最近
-@pytest.mark.parametrize('pymu_blocks, obj_box, target_boxs', [
-    ([{'bbox': (80, 90, 249, 200)}, {'bbox': (183, 100, 240, 155)}], (183, 100, 240, 155), None),  # 包含
-    ([{'bbox': (28, 90, 77, 126)}, {'bbox': (35, 84, 84, 120)}], (28, 90, 77, 126), None),  # y:重叠，x:重叠大于2
-    ([{'bbox': (28, 90, 77, 126)}, {'bbox': (75, 84, 134, 120)}], (28, 90, 77, 126), {'bbox': (75, 84, 134, 120)}),
-    # y:重叠，x:重叠小于等于2
-    ([{'bbox': (239, 115, 379, 167)},
-      {'bbox': (33, 237, 104, 262)},
-      {'bbox': (124, 288, 168, 325)},
-      {'bbox': (242, 291, 379, 340)},
-      {'bbox': (55, 113, 161, 154)},
-      {'bbox': (266, 123, 384, 217)}], (55, 113, 161, 154), {'bbox': (239, 115, 379, 167)}),  # y重叠，x right
-    # ([{"bbox": (169, 115, 298, 181)},
-    #   {"bbox": (169, 219, 268, 240)},
-    #   {"bbox": (33, 177, 104, 262)},
-    #   {"bbox": (124, 288, 168, 325)},
-    #   {"bbox": (55, 117, 161, 154)},
-    #   {"bbox": (266, 183, 384, 217)}], (33, 177, 104, 262),
-    #  [{"bbox": (169, 115, 298, 181)}, {"bbox": (169, 219, 268, 240)}]),  # y有重叠，x重叠小于2或者在right Error
-])
-def test_find_right_nearest_text_bbox(pymu_blocks: list, obj_box: tuple, target_boxs: dict) -> None:
-    assert target_boxs == find_right_nearest_text_bbox(pymu_blocks, obj_box)
-
-
-# 判断两个矩形框的相对位置关系 (left, right, bottom, top)
-@pytest.mark.parametrize('box1, box2, target_box', [
-    # (None, None, "Error"),  # Error
-    ((80, 90, 249, 200), (183, 100, 240, 155), (False, False, False, False)),  # 包含
-    # ((124, 81, 222, 173), (60, 221, 123, 358), (False, True, False, True)),  # 分离，右上 Error
-    ((142, 109, 238, 164), (134, 211, 224, 270), (False, False, False, True)),  # 分离，上
-    # ((51, 69, 192, 147), (205, 198, 282, 297), (True, False, False, True)),  # 分离，左上 Error
-    # ((101, 149, 164, 289), (172, 130, 230, 268), (True, False, False, False)),  # 分离，左  Error
-    # ((69, 196, 124, 285), (130, 127, 232, 186), (True, False, True, False)),  # 分离，左下  Error
-    ((103, 212, 171, 304), (56, 90, 170, 209), (False, False, True, False)),  # 分离，下
-    # ((124, 367, 222, 415), (60, 221, 123, 358), (False, True, True, False)),  # 分离，右下 Error
-    # ((172, 130, 230, 268), (101, 149, 164, 289), (False, True, False, False)),  # 分离，右  Error
-])
-def test_bbox_relative_pos(box1: tuple, box2: tuple, target_box: tuple) -> None:
-    assert target_box == bbox_relative_pos(box1, box2)
-
-
-# 计算两个矩形框的距离
-"""
-受bbox_relative_pos方法的影响，左右相反，这里计算结果全部受影响，在错误的基础上计算出了正确的结果
-"""
-
-
-@pytest.mark.parametrize('box1, box2, target_num', [
-    # (None, None, "Error"),  # Error
-    ((80, 90, 249, 200), (183, 100, 240, 155), 0.0),  # 包含
-    ((142, 109, 238, 164), (134, 211, 224, 270), 47.0),  # 分离，上
-    ((103, 212, 171, 304), (56, 90, 170, 209), 3.0),  # 分离，下
-    ((101, 149, 164, 289), (172, 130, 230, 268), 8.0),  # 分离，左
-    ((172, 130, 230, 268), (101, 149, 164, 289), 8.0),  # 分离，右
-    ((80.3, 90.8, 249.0, 200.5), (183.8, 100.6, 240.2, 155.1), 0.0),  # 包含
-    ((142.3, 109.5, 238.9, 164.2), (134.4, 211.2, 224.8, 270.1), 47.0),  # 分离，上
-    ((103.5, 212.6, 171.1, 304.8), (56.1, 90.9, 170.6, 209.2), 3.4),  # 分离，下
-    ((101.1, 149.3, 164.9, 289.0), (172.1, 130.1, 230.5, 268.5), 7.2),  # 分离，左
-    ((172.1, 130.3, 230.1, 268.1), (101.2, 149.9, 164.3, 289.1), 7.8),  # 分离，右
-    ((124.3, 81.1, 222.5, 173.8), (60.3, 221.5, 123.0, 358.9), 47.717711596429254),  # 分离，右上
-    ((51.2, 69.31, 192.5, 147.9), (205.0, 198.1, 282.98, 297.09), 51.73287156151299),  # 分离，左上
-    ((124.3, 367.1, 222.9, 415.7), (60.9, 221.4, 123.2, 358.6), 8.570880934886448),  # 分离，右下
-    ((69.9, 196.2, 124.1, 285.7), (130.0, 127.3, 232.6, 186.1), 11.69700816448377),  # 分离，左下
-])
-def test_bbox_distance(box1: tuple, box2: tuple, target_num: float) -> None:
-    assert target_num - bbox_distance(box1, box2) < 1
-
-
-@pytest.mark.skip(reason='skip')
-# 根据bucket_name获取s3配置ak,sk,endpoint
-def test_get_s3_config() -> None:
-    bucket_name = os.getenv('bucket_name')
-    target_data = os.getenv('target_data')
-    assert convert_string_to_list(target_data) == list(get_s3_config(bucket_name))
-
-
-def convert_string_to_list(s):
-    cleaned_s = s.strip("'")
-    items = cleaned_s.split(',')
-    cleaned_items = [item.strip() for item in items]
-    return cleaned_items