[ { "layout_dets": [ { "category_id": 0, "poly": [ 282.1632080078125, 156.2249755859375, 1416.6795654296875, 156.2249755859375, 1416.6795654296875, 313.81280517578125, 282.1632080078125, 313.81280517578125 ], "score": 0.999998927116394, "bbox": [ 101, 56, 510, 112 ] }, { "category_id": 1, "poly": [ 861.656982421875, 522.7763061523438, 1569.3853759765625, 522.7763061523438, 1569.3853759765625, 656.883544921875, 861.656982421875, 656.883544921875 ], "score": 0.9999970197677612, "bbox": [ 310, 188, 564, 236 ] }, { "category_id": 1, "poly": [ 131.8020782470703, 924.7362670898438, 838.9530639648438, 924.7362670898438, 838.9530639648438, 1323.7529296875, 131.8020782470703, 1323.7529296875 ], "score": 0.9999949932098389, "bbox": [ 47, 332, 302, 476 ] }, { "category_id": 1, "poly": [ 133.32005310058594, 1324.5035400390625, 839.2289428710938, 1324.5035400390625, 839.2289428710938, 1589.4503173828125, 133.32005310058594, 1589.4503173828125 ], "score": 0.999994158744812, "bbox": [ 47, 476, 302, 572 ] }, { "category_id": 1, "poly": [ 863.3811645507812, 1486.610107421875, 1569.2880859375, 1486.610107421875, 1569.2880859375, 1852.443603515625, 863.3811645507812, 1852.443603515625 ], "score": 0.9999936819076538, "bbox": [ 310, 535, 564, 666 ] }, { "category_id": 1, "poly": [ 862.9096069335938, 1187.8067626953125, 1568.2279052734375, 1187.8067626953125, 1568.2279052734375, 1486.08935546875, 862.9096069335938, 1486.08935546875 ], "score": 0.9999932050704956, "bbox": [ 310, 427, 564, 534 ] }, { "category_id": 1, "poly": [ 131.8186492919922, 1652.7752685546875, 837.5543823242188, 1652.7752685546875, 837.5543823242188, 2019.429443359375, 131.8186492919922, 2019.429443359375 ], "score": 0.9999901056289673, "bbox": [ 47, 594, 301, 726 ] }, { "category_id": 0, "poly": [ 375.1526794433594, 881.8807983398438, 594.3075561523438, 881.8807983398438, 594.3075561523438, 913.4786987304688, 375.1526794433594, 913.4786987304688 ], "score": 0.9999892115592957, "bbox": [ 135, 317, 213, 328 ] }, { "category_id": 2, "poly": [ 636.1867065429688, 2099.795654296875, 1063.7423095703125, 2099.795654296875, 1063.7423095703125, 2124.524169921875, 636.1867065429688, 2124.524169921875 ], "score": 0.9999860525131226, "bbox": [ 229, 755, 382, 764 ] }, { "category_id": 0, "poly": [ 375.91864013671875, 1610.209228515625, 592.8395385742188, 1610.209228515625, 592.8395385742188, 1641.5789794921875, 375.91864013671875, 1641.5789794921875 ], "score": 0.9999815821647644, "bbox": [ 135, 579, 213, 590 ] }, { "category_id": 4, "poly": [ 860.6583251953125, 995.6574096679688, 1569.622314453125, 995.6574096679688, 1569.622314453125, 1126.8409423828125, 860.6583251953125, 1126.8409423828125 ], "score": 0.9999815821647644, "bbox": [ 309, 358, 565, 405 ] }, { "category_id": 1, "poly": [ 443.1008605957031, 353.8008728027344, 1250.531494140625, 353.8008728027344, 1250.531494140625, 464.65576171875, 443.1008605957031, 464.65576171875 ], "score": 0.9999791979789734, "bbox": [ 159, 127, 450, 167 ] }, { "category_id": 1, "poly": [ 130.8282928466797, 523.2079467773438, 836.5639038085938, 523.2079467773438, 836.5639038085938, 862.0206909179688, 130.8282928466797, 862.0206909179688 ], "score": 0.9999784231185913, "bbox": [ 47, 188, 301, 310 ] }, { "category_id": 1, "poly": [ 862.6514282226562, 1851.426513671875, 1568.510498046875, 1851.426513671875, 1568.510498046875, 2017.93359375, 862.6514282226562, 2017.93359375 ], "score": 0.9999769926071167, "bbox": [ 310, 666, 564, 726 ] }, { "category_id": 3, "poly": [ 882.3795166015625, 685.376708984375, 1544.4088134765625, 685.376708984375, 1544.4088134765625, 969.22265625, 882.3795166015625, 969.22265625 ], "score": 0.9994785785675049, "bbox": [ 317, 246, 555, 348 ] }, { "category_id": 13, "poly": [ 1195, 1062, 1226, 1062, 1226, 1096, 1195, 1096 ], "score": 0.88, "latex": "d_{p}", "bbox": [ 430, 382, 441, 394 ] }, { "category_id": 13, "poly": [ 1304, 1030, 1327, 1030, 1327, 1061, 1304, 1061 ], "score": 0.65, "latex": "\\bar{\\bf p}", "bbox": [ 469, 370, 477, 381 ] }, { "category_id": 15, "poly": [ 344.0, 165.0, 1354.0, 172.0, 1353.0, 236.0, 344.0, 229.0 ], "score": 0.99, "text": "Real-time Temporal Stereo Matching", "bbox": [ 123, 59, 487, 84 ] }, { "category_id": 15, "poly": [ 293.0, 254.0, 1402.0, 254.0, 1402.0, 309.0, 293.0, 309.0 ], "score": 0.99, "text": "using Iterative Adaptive Support Weights", "bbox": [ 105, 91, 504, 111 ] }, { "category_id": 15, "poly": [ 864.0, 527.0, 1568.0, 527.0, 1568.0, 559.0, 864.0, 559.0 ], "score": 0.99, "text": "disparity map. Note that individual disparities can be converted", "bbox": [ 311, 189, 564, 201 ] }, { "category_id": 15, "poly": [ 864.0, 561.0, 1568.0, 561.0, 1568.0, 594.0, 864.0, 594.0 ], "score": 0.98, "text": "to actual depths if the geometry of the camera setup is", "bbox": [ 311, 201, 564, 213 ] }, { "category_id": 15, "poly": [ 859.0, 587.0, 1568.0, 591.0, 1568.0, 630.0, 859.0, 626.0 ], "score": 0.98, "text": " known, i.e., the stereo configuration of cameras has been pre-", "bbox": [ 309, 211, 564, 226 ] }, { "category_id": 15, "poly": [ 862.0, 626.0, 984.0, 626.0, 984.0, 658.0, 862.0, 658.0 ], "score": 1.0, "text": "calibrated.", "bbox": [ 310, 225, 354, 236 ] }, { "category_id": 15, "poly": [ 155.0, 921.0, 839.0, 924.0, 838.0, 963.0, 155.0, 960.0 ], "score": 0.98, "text": " Modern stereo matching algorithms achieve excellent results", "bbox": [ 55, 331, 301, 346 ] }, { "category_id": 15, "poly": [ 127.0, 956.0, 838.0, 958.0, 838.0, 997.0, 127.0, 995.0 ], "score": 0.98, "text": " on static stereo images, as demonstrated by the Middlebury", "bbox": [ 45, 344, 301, 358 ] }, { "category_id": 15, "poly": [ 132.0, 995.0, 836.0, 995.0, 836.0, 1027.0, 132.0, 1027.0 ], "score": 0.98, "text": "stereo performance benchmark [1], [2]. However, their ap-", "bbox": [ 47, 358, 300, 369 ] }, { "category_id": 15, "poly": [ 134.0, 1027.0, 834.0, 1027.0, 834.0, 1059.0, 134.0, 1059.0 ], "score": 1.0, "text": "plication to stereo video sequences does not guarantee inter-", "bbox": [ 48, 369, 300, 381 ] }, { "category_id": 15, "poly": [ 134.0, 1061.0, 836.0, 1061.0, 836.0, 1093.0, 134.0, 1093.0 ], "score": 0.99, "text": "frame consistency of matches extracted from subsequent stereo", "bbox": [ 48, 381, 300, 393 ] }, { "category_id": 15, "poly": [ 132.0, 1095.0, 838.0, 1095.0, 838.0, 1125.0, 132.0, 1125.0 ], "score": 0.99, "text": "frame pairs. The lack of temporal consistency of matches", "bbox": [ 47, 394, 301, 405 ] }, { "category_id": 15, "poly": [ 134.0, 1128.0, 836.0, 1128.0, 836.0, 1157.0, 134.0, 1157.0 ], "score": 1.0, "text": "between successive frames introduces spurious artifacts in the", "bbox": [ 48, 406, 300, 416 ] }, { "category_id": 15, "poly": [ 132.0, 1160.0, 836.0, 1160.0, 836.0, 1192.0, 132.0, 1192.0 ], "score": 0.99, "text": "resulting disparity maps. The problem of obtaining temporally", "bbox": [ 47, 417, 300, 429 ] }, { "category_id": 15, "poly": [ 132.0, 1194.0, 838.0, 1194.0, 838.0, 1226.0, 132.0, 1226.0 ], "score": 0.98, "text": "consistent sequences of disparity maps from video streams is", "bbox": [ 47, 429, 301, 441 ] }, { "category_id": 15, "poly": [ 134.0, 1228.0, 838.0, 1228.0, 838.0, 1260.0, 134.0, 1260.0 ], "score": 0.98, "text": "known as the temporal stereo correspondence problem, yet", "bbox": [ 48, 442, 301, 453 ] }, { "category_id": 15, "poly": [ 129.0, 1258.0, 841.0, 1260.0, 841.0, 1293.0, 129.0, 1290.0 ], "score": 0.98, "text": "the amount of research efforts oriented towards finding an", "bbox": [ 46, 452, 302, 465 ] }, { "category_id": 15, "poly": [ 134.0, 1292.0, 760.0, 1292.0, 760.0, 1325.0, 134.0, 1325.0 ], "score": 0.99, "text": "effective solution to this problem is surprisingly small.", "bbox": [ 48, 465, 273, 477 ] }, { "category_id": 15, "poly": [ 157.0, 1320.0, 836.0, 1322.0, 836.0, 1361.0, 157.0, 1359.0 ], "score": 0.98, "text": " A method is proposed for real-time temporal stereo match-", "bbox": [ 56, 475, 300, 489 ] }, { "category_id": 15, "poly": [ 134.0, 1361.0, 836.0, 1361.0, 836.0, 1393.0, 134.0, 1393.0 ], "score": 1.0, "text": "ing that efficiently propagates matching cost information be-", "bbox": [ 48, 489, 300, 501 ] }, { "category_id": 15, "poly": [ 134.0, 1393.0, 836.0, 1393.0, 836.0, 1425.0, 134.0, 1425.0 ], "score": 0.99, "text": "tween consecutive frames of a stereo video sequence. This", "bbox": [ 48, 501, 300, 513 ] }, { "category_id": 15, "poly": [ 132.0, 1423.0, 834.0, 1425.0, 834.0, 1458.0, 132.0, 1455.0 ], "score": 0.98, "text": "method is invariant to the number of prior frames being", "bbox": [ 47, 512, 300, 524 ] }, { "category_id": 15, "poly": [ 134.0, 1458.0, 836.0, 1458.0, 836.0, 1490.0, 134.0, 1490.0 ], "score": 0.99, "text": "considered, and can be easily incorporated into any local stereo", "bbox": [ 48, 524, 300, 536 ] }, { "category_id": 15, "poly": [ 132.0, 1492.0, 836.0, 1492.0, 836.0, 1524.0, 132.0, 1524.0 ], "score": 0.98, "text": "method based on edge-aware filters. The iterative adaptive", "bbox": [ 47, 537, 300, 548 ] }, { "category_id": 15, "poly": [ 132.0, 1526.0, 838.0, 1526.0, 838.0, 1558.0, 132.0, 1558.0 ], "score": 0.99, "text": "support matching algorithm presented in [3] serves as a", "bbox": [ 47, 549, 301, 560 ] }, { "category_id": 15, "poly": [ 132.0, 1558.0, 557.0, 1558.0, 557.0, 1590.0, 132.0, 1590.0 ], "score": 0.99, "text": "foundation for the proposed method.", "bbox": [ 47, 560, 200, 572 ] }, { "category_id": 15, "poly": [ 887.0, 1483.0, 1571.0, 1485.0, 1571.0, 1524.0, 887.0, 1522.0 ], "score": 0.98, "text": " In contrast, local methods, which are typically built upon", "bbox": [ 319, 533, 565, 548 ] }, { "category_id": 15, "poly": [ 859.0, 1517.0, 1573.0, 1519.0, 1573.0, 1558.0, 859.0, 1556.0 ], "score": 0.97, "text": " the Winner-Takes-All (WTA) framework, have the property of ", "bbox": [ 309, 546, 566, 560 ] }, { "category_id": 15, "poly": [ 864.0, 1556.0, 1566.0, 1556.0, 1566.0, 1588.0, 864.0, 1588.0 ], "score": 0.99, "text": "computational regularity and are thus suitable for implemen-", "bbox": [ 311, 560, 563, 571 ] }, { "category_id": 15, "poly": [ 862.0, 1588.0, 1566.0, 1588.0, 1566.0, 1620.0, 862.0, 1620.0 ], "score": 1.0, "text": "tation on parallel graphics hardware. Within the WTA frame-", "bbox": [ 310, 571, 563, 583 ] }, { "category_id": 15, "poly": [ 862.0, 1616.0, 1568.0, 1618.0, 1568.0, 1657.0, 862.0, 1655.0 ], "score": 0.98, "text": "work, local stereo algorithms consider a range of disparity", "bbox": [ 310, 581, 564, 596 ] }, { "category_id": 15, "poly": [ 864.0, 1655.0, 1566.0, 1655.0, 1566.0, 1687.0, 864.0, 1687.0 ], "score": 0.98, "text": "hypotheses and compute a volume of pixel-wise dissimilarity", "bbox": [ 311, 595, 563, 607 ] }, { "category_id": 15, "poly": [ 862.0, 1689.0, 1571.0, 1689.0, 1571.0, 1721.0, 862.0, 1721.0 ], "score": 0.99, "text": "metrics between the reference image and the matched image at", "bbox": [ 310, 608, 565, 619 ] }, { "category_id": 15, "poly": [ 862.0, 1723.0, 1568.0, 1721.0, 1568.0, 1753.0, 862.0, 1755.0 ], "score": 0.99, "text": "every considered disparity value. Final disparities are chosen", "bbox": [ 310, 620, 564, 631 ] }, { "category_id": 15, "poly": [ 864.0, 1755.0, 1568.0, 1755.0, 1568.0, 1785.0, 864.0, 1785.0 ], "score": 1.0, "text": "from the cost volume by traversing through its values and", "bbox": [ 311, 631, 564, 642 ] }, { "category_id": 15, "poly": [ 866.0, 1788.0, 1568.0, 1788.0, 1568.0, 1820.0, 866.0, 1820.0 ], "score": 0.99, "text": "selecting the disparities associated with minimum matching", "bbox": [ 311, 643, 564, 655 ] }, { "category_id": 15, "poly": [ 859.0, 1817.0, 1377.0, 1820.0, 1377.0, 1859.0, 859.0, 1856.0 ], "score": 0.98, "text": " costs for every pixel of the reference image.", "bbox": [ 309, 654, 495, 669 ] }, { "category_id": 15, "poly": [ 885.0, 1187.0, 1571.0, 1187.0, 1571.0, 1226.0, 885.0, 1226.0 ], "score": 0.97, "text": " In their excellent taxonomy paper [1], Scharstein and", "bbox": [ 318, 427, 565, 441 ] }, { "category_id": 15, "poly": [ 864.0, 1224.0, 1566.0, 1224.0, 1566.0, 1254.0, 864.0, 1254.0 ], "score": 0.99, "text": "Szeliski classify stereo algorithms as local or global meth-", "bbox": [ 311, 440, 563, 451 ] }, { "category_id": 15, "poly": [ 859.0, 1249.0, 1571.0, 1254.0, 1570.0, 1293.0, 859.0, 1288.0 ], "score": 0.99, "text": " ods. Global methods, which offer outstanding accuracy, are", "bbox": [ 309, 449, 565, 465 ] }, { "category_id": 15, "poly": [ 862.0, 1288.0, 1571.0, 1288.0, 1571.0, 1327.0, 862.0, 1327.0 ], "score": 0.98, "text": "typically derived from an energy minimization framework", "bbox": [ 310, 463, 565, 477 ] }, { "category_id": 15, "poly": [ 859.0, 1322.0, 1566.0, 1322.0, 1566.0, 1352.0, 859.0, 1352.0 ], "score": 0.99, "text": "that allows for explicit integration of disparity smoothness", "bbox": [ 309, 475, 563, 486 ] }, { "category_id": 15, "poly": [ 864.0, 1357.0, 1568.0, 1357.0, 1568.0, 1389.0, 864.0, 1389.0 ], "score": 0.99, "text": "constraints and thus is capable of regularizing the solution", "bbox": [ 311, 488, 564, 500 ] }, { "category_id": 15, "poly": [ 864.0, 1391.0, 1568.0, 1391.0, 1568.0, 1421.0, 864.0, 1421.0 ], "score": 1.0, "text": "in weakly textured areas. The minimization, however, is often", "bbox": [ 311, 500, 564, 511 ] }, { "category_id": 15, "poly": [ 864.0, 1423.0, 1568.0, 1423.0, 1568.0, 1455.0, 864.0, 1455.0 ], "score": 0.99, "text": "achieved using iterative methods or graph cuts, which do not", "bbox": [ 311, 512, 564, 523 ] }, { "category_id": 15, "poly": [ 864.0, 1458.0, 1418.0, 1458.0, 1418.0, 1487.0, 864.0, 1487.0 ], "score": 0.99, "text": "lend themselves well to parallel implementation.", "bbox": [ 311, 524, 510, 535 ] }, { "category_id": 15, "poly": [ 155.0, 1650.0, 839.0, 1652.0, 838.0, 1691.0, 155.0, 1689.0 ], "score": 0.97, "text": " Stereo matching is the process of identifying correspon-", "bbox": [ 55, 594, 301, 608 ] }, { "category_id": 15, "poly": [ 134.0, 1687.0, 838.0, 1687.0, 838.0, 1719.0, 134.0, 1719.0 ], "score": 0.99, "text": "dences between pixels in stereo images obtained using a", "bbox": [ 48, 607, 301, 618 ] }, { "category_id": 15, "poly": [ 132.0, 1723.0, 838.0, 1721.0, 838.0, 1753.0, 132.0, 1755.0 ], "score": 0.98, "text": "pair of synchronized cameras. These correspondences are", "bbox": [ 47, 620, 301, 631 ] }, { "category_id": 15, "poly": [ 134.0, 1755.0, 836.0, 1755.0, 836.0, 1788.0, 134.0, 1788.0 ], "score": 0.99, "text": "conveniently represented using the notion of disparity, i.e. the", "bbox": [ 48, 631, 300, 643 ] }, { "category_id": 15, "poly": [ 134.0, 1788.0, 836.0, 1788.0, 836.0, 1820.0, 134.0, 1820.0 ], "score": 1.0, "text": "positional offset between two matching pixels. It is assumed", "bbox": [ 48, 643, 300, 655 ] }, { "category_id": 15, "poly": [ 134.0, 1822.0, 836.0, 1822.0, 836.0, 1854.0, 134.0, 1854.0 ], "score": 0.99, "text": "that the stereo images are rectified, such that matching pixels", "bbox": [ 48, 655, 300, 667 ] }, { "category_id": 15, "poly": [ 132.0, 1854.0, 836.0, 1854.0, 836.0, 1886.0, 132.0, 1886.0 ], "score": 0.99, "text": "are confined within corresponding rows of the images and", "bbox": [ 47, 667, 300, 678 ] }, { "category_id": 15, "poly": [ 134.0, 1888.0, 838.0, 1888.0, 838.0, 1918.0, 134.0, 1918.0 ], "score": 1.0, "text": "thus disparities are restricted to the horizontal dimension, as", "bbox": [ 48, 679, 301, 690 ] }, { "category_id": 15, "poly": [ 134.0, 1920.0, 838.0, 1920.0, 838.0, 1952.0, 134.0, 1952.0 ], "score": 1.0, "text": "illustrated in Figure 1. For visualization purposes, disparities", "bbox": [ 48, 691, 301, 702 ] }, { "category_id": 15, "poly": [ 134.0, 1955.0, 838.0, 1955.0, 838.0, 1987.0, 134.0, 1987.0 ], "score": 0.99, "text": "recovered for every pixel of a reference image are stored", "bbox": [ 48, 703, 301, 715 ] }, { "category_id": 15, "poly": [ 129.0, 1985.0, 841.0, 1982.0, 841.0, 2021.0, 129.0, 2024.0 ], "score": 0.98, "text": "together in the form of an image, which is known as the", "bbox": [ 46, 714, 302, 727 ] }, { "category_id": 15, "poly": [ 370.0, 885.0, 594.0, 885.0, 594.0, 917.0, 370.0, 917.0 ], "score": 1.0, "text": "1. INTRODUCTION", "bbox": [ 133, 318, 213, 330 ] }, { "category_id": 15, "poly": [ 638.0, 2099.0, 1062.0, 2099.0, 1062.0, 2131.0, 638.0, 2131.0 ], "score": 0.98, "text": "978-1-4673-5208-6/13/$31.00 @2013 IEEE", "bbox": [ 229, 755, 382, 767 ] }, { "category_id": 15, "poly": [ 374.0, 1613.0, 591.0, 1613.0, 591.0, 1645.0, 374.0, 1645.0 ], "score": 0.95, "text": "II. BACKGROUND", "bbox": [ 134, 580, 212, 592 ] }, { "category_id": 15, "poly": [ 859.0, 992.0, 1571.0, 995.0, 1571.0, 1034.0, 859.0, 1031.0 ], "score": 0.99, "text": " Figure 1: Geometry of two horizontally aligned views where p", "bbox": [ 309, 357, 565, 372 ] }, { "category_id": 15, "poly": [ 864.0, 1098.0, 1291.0, 1098.0, 1291.0, 1130.0, 864.0, 1130.0 ], "score": 0.99, "text": "them along the horizontal dimension.", "bbox": [ 311, 395, 464, 406 ] }, { "category_id": 15, "poly": [ 859.0, 1061.0, 1194.0, 1059.0, 1194.0, 1098.0, 859.0, 1100.0 ], "score": 0.98, "text": " pixel in the target frame, and", "bbox": [ 309, 381, 429, 395 ] }, { "category_id": 15, "poly": [ 1227.0, 1061.0, 1571.0, 1059.0, 1571.0, 1098.0, 1227.0, 1100.0 ], "score": 0.97, "text": " denotes the disparity between", "bbox": [ 441, 381, 565, 395 ] }, { "category_id": 15, "poly": [ 864.0, 1034.0, 1303.0, 1034.0, 1303.0, 1063.0, 864.0, 1063.0 ], "score": 0.99, "text": "denotes a pixel in the reference frame,", "bbox": [ 311, 372, 469, 382 ] }, { "category_id": 15, "poly": [ 1328.0, 1034.0, 1566.0, 1034.0, 1566.0, 1063.0, 1328.0, 1063.0 ], "score": 0.96, "text": " denotes its matching", "bbox": [ 478, 372, 563, 382 ] }, { "category_id": 15, "poly": [ 508.0, 357.0, 1194.0, 360.0, 1194.0, 392.0, 508.0, 390.0 ], "score": 0.98, "text": "Jedrzej Kowalczuk, Eric T. Psota, and Lance C. Pérez", "bbox": [ 182, 128, 429, 141 ] }, { "category_id": 15, "poly": [ 443.0, 392.0, 1245.0, 392.0, 1245.0, 424.0, 443.0, 424.0 ], "score": 0.99, "text": "Department of Electrical Engineering, University of Nebraska-Lincoln", "bbox": [ 159, 141, 448, 152 ] }, { "category_id": 15, "poly": [ 614.0, 435.0, 1081.0, 435.0, 1081.0, 465.0, 614.0, 465.0 ], "score": 0.99, "text": "[jkowalczuk2,epsota,lperez] @unl.edu", "bbox": [ 221, 156, 389, 167 ] }, { "category_id": 15, "poly": [ 159.0, 527.0, 836.0, 527.0, 836.0, 559.0, 159.0, 559.0 ], "score": 0.98, "text": "Abstract-Stereo matching algorithms are nearly always de-", "bbox": [ 57, 189, 300, 201 ] }, { "category_id": 15, "poly": [ 132.0, 555.0, 838.0, 555.0, 838.0, 587.0, 132.0, 587.0 ], "score": 0.98, "text": "signed to find matches between a single pair of images. A method", "bbox": [ 47, 199, 301, 211 ] }, { "category_id": 15, "poly": [ 134.0, 580.0, 836.0, 580.0, 836.0, 612.0, 134.0, 612.0 ], "score": 1.0, "text": "is presented that was specifically designed to operate on sequences", "bbox": [ 48, 208, 300, 220 ] }, { "category_id": 15, "poly": [ 132.0, 605.0, 838.0, 607.0, 838.0, 646.0, 132.0, 644.0 ], "score": 0.99, "text": "of images. This method considers the cost of matching image", "bbox": [ 47, 217, 301, 232 ] }, { "category_id": 15, "poly": [ 132.0, 637.0, 838.0, 637.0, 838.0, 669.0, 132.0, 669.0 ], "score": 0.98, "text": "points in both the spatial and temporal domain. To maintain", "bbox": [ 47, 229, 301, 240 ] }, { "category_id": 15, "poly": [ 134.0, 667.0, 838.0, 667.0, 838.0, 699.0, 134.0, 699.0 ], "score": 0.97, "text": "real-time operation, a temporal cost aggregation method is used", "bbox": [ 48, 240, 301, 251 ] }, { "category_id": 15, "poly": [ 132.0, 692.0, 836.0, 692.0, 836.0, 722.0, 132.0, 722.0 ], "score": 0.98, "text": "to evaluate the likelihood of matches that is invariant with respect", "bbox": [ 47, 249, 300, 259 ] }, { "category_id": 15, "poly": [ 127.0, 717.0, 841.0, 715.0, 841.0, 754.0, 127.0, 756.0 ], "score": 0.97, "text": "to the number of prior images being considered. This method", "bbox": [ 45, 258, 302, 271 ] }, { "category_id": 15, "poly": [ 127.0, 742.0, 841.0, 745.0, 841.0, 784.0, 127.0, 781.0 ], "score": 0.98, "text": "has been implemented on massively parallel GPU hardware,", "bbox": [ 45, 267, 302, 282 ] }, { "category_id": 15, "poly": [ 132.0, 777.0, 838.0, 777.0, 838.0, 809.0, 132.0, 809.0 ], "score": 0.99, "text": "and the implementation ranks as one of the fastest and most", "bbox": [ 47, 279, 301, 291 ] }, { "category_id": 15, "poly": [ 132.0, 802.0, 838.0, 804.0, 838.0, 836.0, 132.0, 834.0 ], "score": 0.99, "text": "accurate real-time stereo matching methods as measured by the", "bbox": [ 47, 288, 301, 300 ] }, { "category_id": 15, "poly": [ 134.0, 830.0, 619.0, 830.0, 619.0, 862.0, 134.0, 862.0 ], "score": 0.99, "text": "Middlebury stereo performance benchmark.", "bbox": [ 48, 298, 222, 310 ] }, { "category_id": 15, "poly": [ 887.0, 1849.0, 1568.0, 1852.0, 1568.0, 1891.0, 887.0, 1888.0 ], "score": 0.99, "text": " Disparity maps obtained using this simple strategy are often", "bbox": [ 319, 665, 564, 680 ] }, { "category_id": 15, "poly": [ 862.0, 1888.0, 1568.0, 1888.0, 1568.0, 1920.0, 862.0, 1920.0 ], "score": 0.98, "text": "too noisy to be considered useable. To reduce the effects", "bbox": [ 310, 679, 564, 691 ] }, { "category_id": 15, "poly": [ 864.0, 1923.0, 1568.0, 1923.0, 1568.0, 1952.0, 864.0, 1952.0 ], "score": 0.99, "text": "of noise and enforce spatial consistency of matches, local", "bbox": [ 311, 692, 564, 702 ] }, { "category_id": 15, "poly": [ 862.0, 1948.0, 1568.0, 1950.0, 1568.0, 1989.0, 861.0, 1987.0 ], "score": 0.99, "text": "stereo algorithms consider arbitrarily shaped and sized support", "bbox": [ 310, 701, 564, 716 ] }, { "category_id": 15, "poly": [ 864.0, 1989.0, 1568.0, 1989.0, 1568.0, 2021.0, 864.0, 2021.0 ], "score": 0.99, "text": "windows centered at each pixel of the reference image, and", "bbox": [ 311, 716, 564, 727 ] } ], "page_info": { "page_no": 0, "height": 2200, "width": 1700 } }, { "layout_dets": [ { "category_id": 8, "poly": [ 962.3624267578125, 1513.2073974609375, 1465.4017333984375, 1513.2073974609375, 1465.4017333984375, 1669.1397705078125, 962.3624267578125, 1669.1397705078125 ], "score": 0.9999995231628418, "bbox": [ 346, 544, 527, 600 ] }, { "category_id": 9, "poly": [ 1530.72998046875, 1101.879638671875, 1565.2568359375, 1101.879638671875, 1565.2568359375, 1130.8609619140625, 1530.72998046875, 1130.8609619140625 ], "score": 0.9999992251396179, "bbox": [ 551, 396, 563, 407 ] }, { "category_id": 9, "poly": [ 1529.8787841796875, 1575.843505859375, 1565.931396484375, 1575.843505859375, 1565.931396484375, 1607.2161865234375, 1529.8787841796875, 1607.2161865234375 ], "score": 0.9999987483024597, "bbox": [ 550, 567, 563, 578 ] }, { "category_id": 1, "poly": [ 865.1971435546875, 1684.040283203125, 1566.561279296875, 1684.040283203125, 1566.561279296875, 1813.7021484375, 865.1971435546875, 1813.7021484375 ], "score": 0.9999987483024597, "bbox": [ 311, 606, 563, 652 ] }, { "category_id": 9, "poly": [ 1530.5263671875, 1839.3990478515625, 1565.1201171875, 1839.3990478515625, 1565.1201171875, 1869.825439453125, 1530.5263671875, 1869.825439453125 ], "score": 0.9999977946281433, "bbox": [ 550, 662, 563, 673 ] }, { "category_id": 8, "poly": [ 972.3255004882812, 1075.85498046875, 1461.2088623046875, 1075.85498046875, 1461.2088623046875, 1155.465087890625, 972.3255004882812, 1155.465087890625 ], "score": 0.999996542930603, "bbox": [ 350, 387, 526, 415 ] }, { "category_id": 1, "poly": [ 865.4874267578125, 158.47100830078125, 1565.84375, 158.47100830078125, 1565.84375, 355.3230285644531, 865.4874267578125, 355.3230285644531 ], "score": 0.9999960660934448, "bbox": [ 311, 57, 563, 127 ] }, { "category_id": 1, "poly": [ 133.51382446289062, 158.21670532226562, 835.5382080078125, 158.21670532226562, 835.5382080078125, 558.8020629882812, 133.51382446289062, 558.8020629882812 ], "score": 0.9999951124191284, "bbox": [ 48, 56, 300, 201 ] }, { "category_id": 1, "poly": [ 134.01239013671875, 954.4151000976562, 836.1470336914062, 954.4151000976562, 836.1470336914062, 1618.77197265625, 134.01239013671875, 1618.77197265625 ], "score": 0.9999947547912598, "bbox": [ 48, 343, 301, 582 ] }, { "category_id": 1, "poly": [ 134.4542999267578, 558.8201904296875, 834.2548828125, 558.8201904296875, 834.2548828125, 954.7811279296875, 134.4542999267578, 954.7811279296875 ], "score": 0.9999943971633911, "bbox": [ 48, 201, 300, 343 ] }, { "category_id": 1, "poly": [ 866.33642578125, 421.84442138671875, 1566.451904296875, 421.84442138671875, 1566.451904296875, 787.1864624023438, 866.33642578125, 787.1864624023438 ], "score": 0.9999930262565613, "bbox": [ 311, 151, 563, 283 ] }, { "category_id": 1, "poly": [ 864.974853515625, 1167.92236328125, 1567.0927734375, 1167.92236328125, 1567.0927734375, 1298.29541015625, 864.974853515625, 1298.29541015625 ], "score": 0.9999929666519165, "bbox": [ 311, 420, 564, 467 ] }, { "category_id": 1, "poly": [ 864.5220947265625, 853.943359375, 1565.82080078125, 853.943359375, 1565.82080078125, 1080.8125, 864.5220947265625, 1080.8125 ], "score": 0.9999923706054688, "bbox": [ 311, 307, 563, 389 ] }, { "category_id": 1, "poly": [ 865.4466552734375, 1919.30615234375, 1566.4720458984375, 1919.30615234375, 1566.4720458984375, 2017.154541015625, 865.4466552734375, 2017.154541015625 ], "score": 0.9999904036521912, "bbox": [ 311, 690, 563, 726 ] }, { "category_id": 1, "poly": [ 864.801513671875, 1302.438232421875, 1566.760986328125, 1302.438232421875, 1566.760986328125, 1498.9681396484375, 864.801513671875, 1498.9681396484375 ], "score": 0.9999889135360718, "bbox": [ 311, 468, 564, 539 ] }, { "category_id": 1, "poly": [ 133.34628295898438, 1620.0596923828125, 836.7553100585938, 1620.0596923828125, 836.7553100585938, 2018.44873046875, 133.34628295898438, 2018.44873046875 ], "score": 0.9999861717224121, "bbox": [ 48, 583, 301, 726 ] }, { "category_id": 0, "poly": [ 865.5296020507812, 809.8997802734375, 1302.7711181640625, 809.8997802734375, 1302.7711181640625, 841.3140869140625, 865.5296020507812, 841.3140869140625 ], "score": 0.9999798536300659, "bbox": [ 311, 291, 468, 302 ] }, { "category_id": 0, "poly": [ 1131.11181640625, 378.66229248046875, 1299.6181640625, 378.66229248046875, 1299.6181640625, 409.04852294921875, 1131.11181640625, 409.04852294921875 ], "score": 0.9999651908874512, "bbox": [ 407, 136, 467, 147 ] }, { "category_id": 8, "poly": [ 1003.5569458007812, 1824.2362060546875, 1420.7132568359375, 1824.2362060546875, 1420.7132568359375, 1905.175048828125, 1003.5569458007812, 1905.175048828125 ], "score": 0.999914288520813, "bbox": [ 361, 656, 511, 685 ] }, { "category_id": 14, "poly": [ 974, 1076, 1454, 1076, 1454, 1155, 974, 1155 ], "score": 0.94, "latex": "w(p,q)=\\exp{\\left(-\\frac{\\Delta_{g}(p,q)}{\\gamma_{g}}-\\frac{\\Delta_{c}(p,q)}{\\gamma_{c}}\\right)},", "bbox": [ 350, 387, 523, 415 ] }, { "category_id": 14, "poly": [ 1006, 1825, 1423, 1825, 1423, 1907, 1006, 1907 ], "score": 0.94, "latex": "\\delta(q,\\bar{q})=\\sum_{c=\\{r,g,b\\}}\\operatorname*{min}(|q_{c}-\\bar{q}_{c}|,\\tau).", "bbox": [ 362, 657, 512, 686 ] }, { "category_id": 14, "poly": [ 963, 1510, 1464, 1510, 1464, 1671, 963, 1671 ], "score": 0.93, "latex": "C(p,\\bar{p})=\\frac{\\displaystyle\\sum_{q\\in\\Omega_{p},\\bar{q}\\in\\Omega_{\\bar{p}}}w(p,q)w(\\bar{p},\\bar{q})\\delta(q,\\bar{q})}{\\displaystyle\\sum_{q\\in\\Omega_{p},\\bar{q}\\in\\Omega_{\\bar{p}}}w(p,q)w(\\bar{p},\\bar{q})}\\,,", "bbox": [ 346, 543, 527, 601 ] }, { "category_id": 13, "poly": [ 1335, 1166, 1432, 1166, 1432, 1200, 1335, 1200 ], "score": 0.93, "latex": "\\Delta_{c}(p,q)", "bbox": [ 480, 419, 515, 432 ] }, { "category_id": 13, "poly": [ 939, 1166, 1039, 1166, 1039, 1201, 939, 1201 ], "score": 0.93, "latex": "\\Delta_{g}(p,q)", "bbox": [ 338, 419, 374, 432 ] }, { "category_id": 13, "poly": [ 1289, 1683, 1365, 1683, 1365, 1717, 1289, 1717 ], "score": 0.93, "latex": "\\delta(q,\\bar{q})", "bbox": [ 464, 605, 491, 618 ] }, { "category_id": 13, "poly": [ 1362, 1367, 1441, 1367, 1441, 1401, 1362, 1401 ], "score": 0.92, "latex": "\\bar{p}\\in S_{p}", "bbox": [ 490, 492, 518, 504 ] }, { "category_id": 13, "poly": [ 864, 1019, 951, 1019, 951, 1053, 864, 1053 ], "score": 0.92, "latex": "q\\in\\Omega_{p}", "bbox": [ 311, 366, 342, 379 ] }, { "category_id": 13, "poly": [ 1351, 953, 1388, 953, 1388, 987, 1351, 987 ], "score": 0.9, "latex": "\\Omega_{p}", "bbox": [ 486, 343, 499, 355 ] }, { "category_id": 13, "poly": [ 913, 1467, 949, 1467, 949, 1501, 913, 1501 ], "score": 0.89, "latex": "\\Omega_{\\bar{p}}", "bbox": [ 328, 528, 341, 540 ] }, { "category_id": 13, "poly": [ 1531, 1367, 1565, 1367, 1565, 1401, 1531, 1401 ], "score": 0.89, "latex": "S_{p}", "bbox": [ 551, 492, 563, 504 ] }, { "category_id": 13, "poly": [ 1528, 1434, 1565, 1434, 1565, 1468, 1528, 1468 ], "score": 0.89, "latex": "\\Omega_{p}", "bbox": [ 550, 516, 563, 528 ] }, { "category_id": 13, "poly": [ 1485, 1205, 1516, 1205, 1516, 1234, 1485, 1234 ], "score": 0.88, "latex": "\\gamma_{g}", "bbox": [ 534, 433, 545, 444 ] }, { "category_id": 13, "poly": [ 1159, 1206, 1178, 1206, 1178, 1233, 1159, 1233 ], "score": 0.82, "latex": "p", "bbox": [ 417, 434, 424, 443 ] }, { "category_id": 13, "poly": [ 863, 1238, 893, 1238, 893, 1266, 863, 1266 ], "score": 0.82, "latex": "\\gamma_{c}", "bbox": [ 310, 445, 321, 455 ] }, { "category_id": 13, "poly": [ 1177, 1436, 1196, 1436, 1196, 1465, 1177, 1465 ], "score": 0.8, "latex": "\\bar{p}", "bbox": [ 423, 516, 430, 527 ] }, { "category_id": 13, "poly": [ 1371, 1024, 1391, 1024, 1391, 1051, 1371, 1051 ], "score": 0.8, "latex": "p", "bbox": [ 493, 368, 500, 378 ] }, { "category_id": 13, "poly": [ 1540, 1406, 1558, 1406, 1558, 1432, 1540, 1432 ], "score": 0.8, "latex": "p", "bbox": [ 554, 506, 560, 515 ] }, { "category_id": 13, "poly": [ 1447, 1024, 1465, 1024, 1465, 1051, 1447, 1051 ], "score": 0.79, "latex": "q", "bbox": [ 520, 368, 527, 378 ] }, { "category_id": 13, "poly": [ 1101, 1437, 1121, 1437, 1121, 1465, 1101, 1465 ], "score": 0.79, "latex": "p", "bbox": [ 396, 517, 403, 527 ] }, { "category_id": 13, "poly": [ 1389, 1307, 1407, 1307, 1407, 1332, 1389, 1332 ], "score": 0.79, "latex": "p", "bbox": [ 500, 470, 506, 479 ] }, { "category_id": 13, "poly": [ 1230, 1206, 1247, 1206, 1247, 1233, 1230, 1233 ], "score": 0.78, "latex": "q", "bbox": [ 442, 434, 448, 443 ] }, { "category_id": 13, "poly": [ 1029, 1372, 1048, 1372, 1048, 1399, 1029, 1399 ], "score": 0.78, "latex": "p", "bbox": [ 370, 493, 377, 503 ] }, { "category_id": 13, "poly": [ 916, 1752, 934, 1752, 934, 1782, 916, 1782 ], "score": 0.76, "latex": "\\bar{q}", "bbox": [ 329, 630, 336, 641 ] }, { "category_id": 13, "poly": [ 1407, 1925, 1425, 1925, 1425, 1946, 1407, 1946 ], "score": 0.75, "latex": "\\tau", "bbox": [ 506, 693, 513, 700 ] }, { "category_id": 13, "poly": [ 1548, 1722, 1565, 1722, 1565, 1749, 1548, 1749 ], "score": 0.75, "latex": "q", "bbox": [ 557, 619, 563, 629 ] }, { "category_id": 13, "poly": [ 1050, 992, 1068, 992, 1068, 1018, 1050, 1018 ], "score": 0.75, "latex": "p", "bbox": [ 378, 357, 384, 366 ] }, { "category_id": 15, "poly": [ 864.0, 1783.0, 1298.0, 1783.0, 1298.0, 1822.0, 864.0, 1822.0 ], "score": 0.99, "text": "green, and blue components given by", "bbox": [ 311, 641, 467, 655 ] }, { "category_id": 15, "poly": [ 866.0, 1687.0, 1288.0, 1687.0, 1288.0, 1719.0, 866.0, 1719.0 ], "score": 0.96, "text": "where the pixel dissimilarity metric", "bbox": [ 311, 607, 463, 618 ] }, { "category_id": 15, "poly": [ 1366.0, 1687.0, 1564.0, 1687.0, 1564.0, 1719.0, 1366.0, 1719.0 ], "score": 0.97, "text": "ischosen as the", "bbox": [ 491, 607, 563, 618 ] }, { "category_id": 15, "poly": [ 866.0, 1751.0, 915.0, 1751.0, 915.0, 1783.0, 866.0, 1783.0 ], "score": 1.0, "text": "and", "bbox": [ 311, 630, 329, 641 ] }, { "category_id": 15, "poly": [ 935.0, 1751.0, 1564.0, 1751.0, 1564.0, 1783.0, 935.0, 1783.0 ], "score": 0.98, "text": ". Here, the truncation of color difference for the red,", "bbox": [ 336, 630, 563, 641 ] }, { "category_id": 15, "poly": [ 866.0, 1719.0, 1547.0, 1719.0, 1547.0, 1749.0, 866.0, 1749.0 ], "score": 0.99, "text": "sum of truncated absolute color differences between pixels", "bbox": [ 311, 618, 556, 629 ] }, { "category_id": 15, "poly": [ 864.0, 163.0, 1568.0, 163.0, 1568.0, 192.0, 864.0, 192.0 ], "score": 1.0, "text": "temporal information, making it possible to process a temporal", "bbox": [ 311, 58, 564, 69 ] }, { "category_id": 15, "poly": [ 859.0, 188.0, 1571.0, 193.0, 1570.0, 229.0, 859.0, 225.0 ], "score": 0.99, "text": " collection of cost volumes. The filtering operation was shown", "bbox": [ 309, 67, 565, 82 ] }, { "category_id": 15, "poly": [ 864.0, 229.0, 1566.0, 229.0, 1566.0, 261.0, 864.0, 261.0 ], "score": 0.99, "text": "to preserve spatio-temporal edges present in the cost volumes,", "bbox": [ 311, 82, 563, 93 ] }, { "category_id": 15, "poly": [ 859.0, 261.0, 1564.0, 264.0, 1564.0, 296.0, 859.0, 293.0 ], "score": 0.98, "text": " resulting in increased temporal consistency of disparity maps,", "bbox": [ 309, 93, 563, 106 ] }, { "category_id": 15, "poly": [ 864.0, 296.0, 1566.0, 296.0, 1566.0, 328.0, 864.0, 328.0 ], "score": 0.99, "text": "greater robustness to image noise, and more accurate behavior", "bbox": [ 311, 106, 563, 118 ] }, { "category_id": 15, "poly": [ 866.0, 328.0, 1160.0, 328.0, 1160.0, 360.0, 866.0, 360.0 ], "score": 1.0, "text": "around object boundaries.", "bbox": [ 311, 118, 417, 129 ] }, { "category_id": 15, "poly": [ 129.0, 158.0, 841.0, 153.0, 841.0, 192.0, 130.0, 197.0 ], "score": 0.99, "text": "aggregate cost values within the pixel neighborhoods defined", "bbox": [ 46, 56, 302, 69 ] }, { "category_id": 15, "poly": [ 129.0, 188.0, 841.0, 190.0, 841.0, 229.0, 129.0, 227.0 ], "score": 0.99, "text": "by these windows. In 2005, Yoon and Kweon [4] proposed", "bbox": [ 46, 67, 302, 82 ] }, { "category_id": 15, "poly": [ 132.0, 229.0, 838.0, 229.0, 838.0, 261.0, 132.0, 261.0 ], "score": 1.0, "text": "an adaptive matching cost aggregation scheme, which assigns", "bbox": [ 47, 82, 301, 93 ] }, { "category_id": 15, "poly": [ 132.0, 261.0, 838.0, 261.0, 838.0, 293.0, 132.0, 293.0 ], "score": 0.98, "text": "a weight value to every pixel located in the support window", "bbox": [ 47, 93, 301, 105 ] }, { "category_id": 15, "poly": [ 132.0, 293.0, 838.0, 293.0, 838.0, 325.0, 132.0, 325.0 ], "score": 0.98, "text": "of a given pixel of interest. The weight value is based on", "bbox": [ 47, 105, 301, 117 ] }, { "category_id": 15, "poly": [ 132.0, 328.0, 836.0, 328.0, 836.0, 360.0, 132.0, 360.0 ], "score": 0.99, "text": "the spatial and color similarity between the pixel of interest", "bbox": [ 47, 118, 300, 129 ] }, { "category_id": 15, "poly": [ 134.0, 360.0, 836.0, 360.0, 836.0, 392.0, 134.0, 392.0 ], "score": 1.0, "text": "and a pixel in its support window, and the aggregated cost is", "bbox": [ 48, 129, 300, 141 ] }, { "category_id": 15, "poly": [ 134.0, 394.0, 836.0, 394.0, 836.0, 426.0, 134.0, 426.0 ], "score": 0.99, "text": "computed as a weighted average of the pixel-wise costs within", "bbox": [ 48, 141, 300, 153 ] }, { "category_id": 15, "poly": [ 127.0, 422.0, 839.0, 424.0, 838.0, 463.0, 127.0, 461.0 ], "score": 0.98, "text": " the considered support window. The edge-preserving nature", "bbox": [ 45, 151, 301, 166 ] }, { "category_id": 15, "poly": [ 129.0, 456.0, 838.0, 454.0, 838.0, 493.0, 129.0, 495.0 ], "score": 0.99, "text": " and matching accuracy of adaptive support weights have made", "bbox": [ 46, 164, 301, 177 ] }, { "category_id": 15, "poly": [ 132.0, 490.0, 841.0, 490.0, 841.0, 529.0, 132.0, 529.0 ], "score": 0.99, "text": "them one of the most popular choices for cost aggregation in", "bbox": [ 47, 176, 302, 190 ] }, { "category_id": 15, "poly": [ 132.0, 527.0, 797.0, 527.0, 797.0, 559.0, 132.0, 559.0 ], "score": 0.97, "text": "recently proposed stereo matching algorithms [3], [5]-[8].", "bbox": [ 47, 189, 286, 201 ] }, { "category_id": 15, "poly": [ 157.0, 958.0, 836.0, 958.0, 836.0, 988.0, 157.0, 988.0 ], "score": 0.99, "text": "It has been demonstrated that the performance of stereo", "bbox": [ 56, 344, 300, 355 ] }, { "category_id": 15, "poly": [ 132.0, 990.0, 838.0, 990.0, 838.0, 1022.0, 132.0, 1022.0 ], "score": 0.99, "text": "algorithms designed to match a single pair of images can", "bbox": [ 47, 356, 301, 367 ] }, { "category_id": 15, "poly": [ 132.0, 1024.0, 836.0, 1024.0, 836.0, 1056.0, 132.0, 1056.0 ], "score": 0.99, "text": "be adapted to take advantage of the temporal dependencies", "bbox": [ 47, 368, 300, 380 ] }, { "category_id": 15, "poly": [ 129.0, 1054.0, 838.0, 1054.0, 838.0, 1093.0, 129.0, 1093.0 ], "score": 0.97, "text": "available in stereo video sequences. Early proposed solutions", "bbox": [ 46, 379, 301, 393 ] }, { "category_id": 15, "poly": [ 132.0, 1091.0, 836.0, 1091.0, 836.0, 1123.0, 132.0, 1123.0 ], "score": 0.99, "text": "to temporal stereo matching attempted to average matching", "bbox": [ 47, 392, 300, 404 ] }, { "category_id": 15, "poly": [ 134.0, 1123.0, 836.0, 1123.0, 836.0, 1155.0, 134.0, 1155.0 ], "score": 0.99, "text": "costs across subsequent frames of a video sequence [13],", "bbox": [ 48, 404, 300, 415 ] }, { "category_id": 15, "poly": [ 129.0, 1153.0, 841.0, 1150.0, 841.0, 1189.0, 129.0, 1192.0 ], "score": 0.98, "text": "[14]. Attempts have been made to integrate estimation of", "bbox": [ 46, 415, 302, 428 ] }, { "category_id": 15, "poly": [ 134.0, 1192.0, 838.0, 1192.0, 838.0, 1224.0, 134.0, 1224.0 ], "score": 0.99, "text": "motion fields (optical flow) into temporal stereo matching. The", "bbox": [ 48, 429, 301, 440 ] }, { "category_id": 15, "poly": [ 132.0, 1224.0, 838.0, 1224.0, 838.0, 1256.0, 132.0, 1256.0 ], "score": 0.99, "text": "methods of [15] and [16] perform smoothing of disparities", "bbox": [ 47, 440, 301, 452 ] }, { "category_id": 15, "poly": [ 129.0, 1254.0, 841.0, 1254.0, 841.0, 1292.0, 129.0, 1292.0 ], "score": 0.99, "text": " along motion vectors recovered from the video sequence. The", "bbox": [ 46, 451, 302, 465 ] }, { "category_id": 15, "poly": [ 132.0, 1290.0, 838.0, 1290.0, 838.0, 1322.0, 132.0, 1322.0 ], "score": 0.99, "text": "estimation of the motion field, however, prevents real-time", "bbox": [ 47, 464, 301, 475 ] }, { "category_id": 15, "poly": [ 132.0, 1325.0, 838.0, 1325.0, 838.0, 1354.0, 132.0, 1354.0 ], "score": 0.99, "text": "implementation, since state-of-the-art optical flow algorithms", "bbox": [ 47, 477, 301, 487 ] }, { "category_id": 15, "poly": [ 129.0, 1354.0, 841.0, 1354.0, 841.0, 1393.0, 129.0, 1393.0 ], "score": 0.99, "text": " do not, in general, approach real-time frame rates. In a related", "bbox": [ 46, 487, 302, 501 ] }, { "category_id": 15, "poly": [ 129.0, 1386.0, 841.0, 1384.0, 841.0, 1423.0, 129.0, 1425.0 ], "score": 0.99, "text": "approach, Sizintsev and Wildes [17], [18] used steerable", "bbox": [ 46, 498, 302, 512 ] }, { "category_id": 15, "poly": [ 134.0, 1423.0, 836.0, 1423.0, 836.0, 1455.0, 134.0, 1455.0 ], "score": 0.99, "text": "filters to obtain descriptors characterizing motion of image", "bbox": [ 48, 512, 300, 523 ] }, { "category_id": 15, "poly": [ 134.0, 1455.0, 836.0, 1455.0, 836.0, 1487.0, 134.0, 1487.0 ], "score": 0.99, "text": "features in both space and time. Unlike traditional algorithms,", "bbox": [ 48, 523, 300, 535 ] }, { "category_id": 15, "poly": [ 132.0, 1490.0, 838.0, 1490.0, 838.0, 1522.0, 132.0, 1522.0 ], "score": 0.98, "text": "their method performs matching on spatio-temporal motion", "bbox": [ 47, 536, 301, 547 ] }, { "category_id": 15, "poly": [ 129.0, 1519.0, 841.0, 1517.0, 841.0, 1556.0, 129.0, 1558.0 ], "score": 0.99, "text": " descriptors, rather than on pure pixel intensity values, which", "bbox": [ 46, 546, 302, 560 ] }, { "category_id": 15, "poly": [ 132.0, 1554.0, 841.0, 1554.0, 841.0, 1593.0, 132.0, 1593.0 ], "score": 0.99, "text": "leads to improved temporal coherence of disparity maps at the", "bbox": [ 47, 559, 302, 573 ] }, { "category_id": 15, "poly": [ 132.0, 1586.0, 698.0, 1586.0, 698.0, 1618.0, 132.0, 1618.0 ], "score": 0.99, "text": "cost of reduced accuracy at depth discontinuities.", "bbox": [ 47, 570, 251, 582 ] }, { "category_id": 15, "poly": [ 159.0, 559.0, 838.0, 559.0, 838.0, 591.0, 159.0, 591.0 ], "score": 0.99, "text": "Recently, Rheman et al. [9], [10] have revisited the cost", "bbox": [ 57, 201, 301, 212 ] }, { "category_id": 15, "poly": [ 132.0, 594.0, 838.0, 589.0, 839.0, 621.0, 132.0, 626.0 ], "score": 1.0, "text": "aggregation step of stereo algorithms, and demonstrated that", "bbox": [ 47, 213, 302, 223 ] }, { "category_id": 15, "poly": [ 132.0, 626.0, 838.0, 626.0, 838.0, 658.0, 132.0, 658.0 ], "score": 0.99, "text": "cost aggregation can be performed by filtering of subsequent", "bbox": [ 47, 225, 301, 236 ] }, { "category_id": 15, "poly": [ 134.0, 660.0, 834.0, 660.0, 834.0, 692.0, 134.0, 692.0 ], "score": 1.0, "text": "layers of the initially computed matching cost volume. In par-", "bbox": [ 48, 237, 300, 249 ] }, { "category_id": 15, "poly": [ 132.0, 692.0, 836.0, 692.0, 836.0, 724.0, 132.0, 724.0 ], "score": 0.99, "text": "ticular, the edge-aware image filters, such as the bilateral filter", "bbox": [ 47, 249, 300, 260 ] }, { "category_id": 15, "poly": [ 127.0, 719.0, 839.0, 724.0, 838.0, 761.0, 127.0, 756.0 ], "score": 0.99, "text": " of Tomasi and Manducci [11] or the guided filter of He [12],", "bbox": [ 45, 258, 301, 273 ] }, { "category_id": 15, "poly": [ 132.0, 759.0, 838.0, 759.0, 838.0, 791.0, 132.0, 791.0 ], "score": 0.98, "text": "have been rendered useful for the problem of matching cost", "bbox": [ 47, 273, 301, 284 ] }, { "category_id": 15, "poly": [ 132.0, 793.0, 838.0, 791.0, 838.0, 823.0, 132.0, 825.0 ], "score": 0.99, "text": "aggregation, enabling stereo algorithms to correctly recover", "bbox": [ 47, 285, 301, 296 ] }, { "category_id": 15, "poly": [ 134.0, 825.0, 838.0, 825.0, 838.0, 857.0, 134.0, 857.0 ], "score": 0.98, "text": "disparities along object boundaries. In fact, Yoon and Kweon's", "bbox": [ 48, 297, 301, 308 ] }, { "category_id": 15, "poly": [ 134.0, 859.0, 838.0, 859.0, 838.0, 891.0, 134.0, 891.0 ], "score": 1.0, "text": "adaptive support-weight cost aggregation scheme is equivalent", "bbox": [ 48, 309, 301, 320 ] }, { "category_id": 15, "poly": [ 132.0, 891.0, 838.0, 891.0, 838.0, 924.0, 132.0, 924.0 ], "score": 0.98, "text": "to the application of the so-called joint bilateral filter to the", "bbox": [ 47, 320, 301, 332 ] }, { "category_id": 15, "poly": [ 134.0, 924.0, 547.0, 924.0, 547.0, 956.0, 134.0, 956.0 ], "score": 1.0, "text": "layers of the matching cost volume.", "bbox": [ 48, 332, 196, 344 ] }, { "category_id": 15, "poly": [ 889.0, 422.0, 1568.0, 424.0, 1568.0, 456.0, 889.0, 454.0 ], "score": 0.98, "text": "The proposed temporal stereo matching algorithm is an", "bbox": [ 320, 151, 564, 164 ] }, { "category_id": 15, "poly": [ 862.0, 456.0, 1571.0, 456.0, 1571.0, 495.0, 862.0, 495.0 ], "score": 1.0, "text": "extension of the real-time iterative adaptive support-weight", "bbox": [ 310, 164, 565, 178 ] }, { "category_id": 15, "poly": [ 864.0, 490.0, 1568.0, 490.0, 1568.0, 522.0, 864.0, 522.0 ], "score": 0.99, "text": "algorithm described in [3]. In addition to real-time two-", "bbox": [ 311, 176, 564, 187 ] }, { "category_id": 15, "poly": [ 864.0, 525.0, 1566.0, 525.0, 1566.0, 557.0, 864.0, 557.0 ], "score": 1.0, "text": "pass aggregation of the cost values in the spatial domain,", "bbox": [ 311, 189, 563, 200 ] }, { "category_id": 15, "poly": [ 864.0, 557.0, 1568.0, 557.0, 1568.0, 589.0, 864.0, 589.0 ], "score": 0.99, "text": "the proposed algorithm enhances stereo matching on video", "bbox": [ 311, 200, 564, 212 ] }, { "category_id": 15, "poly": [ 866.0, 594.0, 1566.0, 594.0, 1566.0, 626.0, 866.0, 626.0 ], "score": 0.97, "text": "sequences by aggregating costs along the time dimension.", "bbox": [ 311, 213, 563, 225 ] }, { "category_id": 15, "poly": [ 864.0, 626.0, 1568.0, 626.0, 1568.0, 658.0, 864.0, 658.0 ], "score": 1.0, "text": "The operation of the algorithm has been divided into four", "bbox": [ 311, 225, 564, 236 ] }, { "category_id": 15, "poly": [ 866.0, 660.0, 1568.0, 660.0, 1568.0, 692.0, 866.0, 692.0 ], "score": 0.99, "text": "stages: 1) two-pass spatial cost aggregation, 2) temporal cost", "bbox": [ 311, 237, 564, 249 ] }, { "category_id": 15, "poly": [ 862.0, 688.0, 1568.0, 685.0, 1568.0, 724.0, 862.0, 727.0 ], "score": 1.0, "text": "aggregation, 3) disparity selection and confidence assessment,", "bbox": [ 310, 247, 564, 260 ] }, { "category_id": 15, "poly": [ 866.0, 724.0, 1568.0, 724.0, 1568.0, 756.0, 866.0, 756.0 ], "score": 1.0, "text": "and 4) iterative disparity refinement. In the following, each of", "bbox": [ 311, 260, 564, 272 ] }, { "category_id": 15, "poly": [ 864.0, 759.0, 1254.0, 759.0, 1254.0, 791.0, 864.0, 791.0 ], "score": 1.0, "text": "these stages is described in detail.", "bbox": [ 311, 273, 451, 284 ] }, { "category_id": 15, "poly": [ 860.0, 1265.0, 1194.0, 1270.0, 1194.0, 1306.0, 859.0, 1301.0 ], "score": 0.99, "text": " color similarity, respectively.", "bbox": [ 309, 455, 429, 470 ] }, { "category_id": 15, "poly": [ 1433.0, 1169.0, 1566.0, 1169.0, 1566.0, 1201.0, 1433.0, 1201.0 ], "score": 0.98, "text": "is the color", "bbox": [ 515, 420, 563, 432 ] }, { "category_id": 15, "poly": [ 864.0, 1169.0, 938.0, 1169.0, 938.0, 1201.0, 864.0, 1201.0 ], "score": 1.0, "text": "where", "bbox": [ 311, 420, 337, 432 ] }, { "category_id": 15, "poly": [ 1040.0, 1169.0, 1334.0, 1169.0, 1334.0, 1201.0, 1040.0, 1201.0 ], "score": 0.98, "text": "is the geometric distance,", "bbox": [ 374, 420, 480, 432 ] }, { "category_id": 15, "poly": [ 1517.0, 1196.0, 1566.0, 1201.0, 1566.0, 1240.0, 1517.0, 1235.0 ], "score": 1.0, "text": "and", "bbox": [ 546, 430, 563, 446 ] }, { "category_id": 15, "poly": [ 862.0, 1196.0, 1158.0, 1201.0, 1158.0, 1240.0, 861.0, 1235.0 ], "score": 1.0, "text": "difference between pixels", "bbox": [ 310, 430, 416, 446 ] }, { "category_id": 15, "poly": [ 894.0, 1233.0, 1566.0, 1231.0, 1566.0, 1270.0, 894.0, 1272.0 ], "score": 0.97, "text": "regulate the strength of grouping by geometric distance and", "bbox": [ 321, 443, 563, 457 ] }, { "category_id": 15, "poly": [ 1179.0, 1196.0, 1229.0, 1201.0, 1229.0, 1240.0, 1179.0, 1235.0 ], "score": 1.0, "text": "and", "bbox": [ 424, 430, 442, 446 ] }, { "category_id": 15, "poly": [ 1248.0, 1196.0, 1484.0, 1201.0, 1484.0, 1240.0, 1248.0, 1235.0 ], "score": 0.99, "text": ", and the coefficients", "bbox": [ 449, 430, 534, 446 ] }, { "category_id": 15, "poly": [ 887.0, 848.0, 1568.0, 850.0, 1568.0, 889.0, 887.0, 887.0 ], "score": 0.99, "text": " Humans group shapes by observing the geometric distance", "bbox": [ 319, 305, 564, 320 ] }, { "category_id": 15, "poly": [ 859.0, 885.0, 1568.0, 882.0, 1568.0, 921.0, 859.0, 924.0 ], "score": 0.98, "text": " and color similarity of points in space. To mimic this vi-", "bbox": [ 309, 318, 564, 331 ] }, { "category_id": 15, "poly": [ 864.0, 921.0, 1568.0, 921.0, 1568.0, 953.0, 864.0, 953.0 ], "score": 0.99, "text": "sual grouping, the adaptive support-weight stereo matching", "bbox": [ 311, 331, 564, 343 ] }, { "category_id": 15, "poly": [ 864.0, 1054.0, 899.0, 1054.0, 899.0, 1084.0, 864.0, 1084.0 ], "score": 1.0, "text": "by", "bbox": [ 311, 379, 323, 390 ] }, { "category_id": 15, "poly": [ 866.0, 956.0, 1350.0, 956.0, 1350.0, 988.0, 866.0, 988.0 ], "score": 0.98, "text": "algorithm [4] considers a support window", "bbox": [ 311, 344, 486, 355 ] }, { "category_id": 15, "poly": [ 1389.0, 956.0, 1566.0, 956.0, 1566.0, 988.0, 1389.0, 988.0 ], "score": 0.98, "text": " centered at the", "bbox": [ 500, 344, 563, 355 ] }, { "category_id": 15, "poly": [ 952.0, 1022.0, 1370.0, 1022.0, 1370.0, 1054.0, 952.0, 1054.0 ], "score": 0.98, "text": ". The support weight relating pixels", "bbox": [ 342, 367, 493, 379 ] }, { "category_id": 15, "poly": [ 1392.0, 1022.0, 1446.0, 1022.0, 1446.0, 1054.0, 1392.0, 1054.0 ], "score": 1.0, "text": "and", "bbox": [ 501, 367, 520, 379 ] }, { "category_id": 15, "poly": [ 1466.0, 1022.0, 1566.0, 1022.0, 1566.0, 1054.0, 1466.0, 1054.0 ], "score": 0.98, "text": "is given", "bbox": [ 527, 367, 563, 379 ] }, { "category_id": 15, "poly": [ 866.0, 990.0, 1049.0, 990.0, 1049.0, 1022.0, 866.0, 1022.0 ], "score": 1.0, "text": "pixel of interest", "bbox": [ 311, 356, 377, 367 ] }, { "category_id": 15, "poly": [ 1069.0, 990.0, 1566.0, 990.0, 1566.0, 1022.0, 1069.0, 1022.0 ], "score": 1.0, "text": ", and assigns a support weight to each pixel", "bbox": [ 384, 356, 563, 367 ] }, { "category_id": 15, "poly": [ 862.0, 1948.0, 1568.0, 1950.0, 1568.0, 1989.0, 861.0, 1987.0 ], "score": 0.98, "text": "vides additional robustness to outliers. Rather than evaluating", "bbox": [ 310, 701, 564, 716 ] }, { "category_id": 15, "poly": [ 864.0, 1989.0, 1566.0, 1989.0, 1566.0, 2021.0, 864.0, 2021.0 ], "score": 0.98, "text": "Equation (2) directly, real-time algorithms often approximate", "bbox": [ 311, 716, 563, 727 ] }, { "category_id": 15, "poly": [ 862.0, 1920.0, 1406.0, 1920.0, 1406.0, 1952.0, 862.0, 1952.0 ], "score": 0.99, "text": "This limits each of their magnitudes to at most", "bbox": [ 310, 691, 506, 702 ] }, { "category_id": 15, "poly": [ 1426.0, 1920.0, 1561.0, 1920.0, 1561.0, 1952.0, 1426.0, 1952.0 ], "score": 0.96, "text": ",whichpro-", "bbox": [ 513, 691, 561, 702 ] }, { "category_id": 15, "poly": [ 859.0, 1331.0, 1571.0, 1334.0, 1571.0, 1373.0, 859.0, 1370.0 ], "score": 0.98, "text": " iterative adaptive support-weight algorithm evaluates matching", "bbox": [ 309, 479, 565, 494 ] }, { "category_id": 15, "poly": [ 859.0, 1464.0, 912.0, 1467.0, 912.0, 1506.0, 859.0, 1503.0 ], "score": 1.0, "text": "and", "bbox": [ 309, 527, 328, 542 ] }, { "category_id": 15, "poly": [ 950.0, 1464.0, 1474.0, 1467.0, 1474.0, 1506.0, 950.0, 1503.0 ], "score": 1.0, "text": ", the initial matching cost is aggregated using", "bbox": [ 342, 527, 530, 542 ] }, { "category_id": 15, "poly": [ 1442.0, 1370.0, 1530.0, 1370.0, 1530.0, 1402.0, 1442.0, 1402.0 ], "score": 0.98, "text": ", where", "bbox": [ 519, 493, 550, 504 ] }, { "category_id": 15, "poly": [ 1197.0, 1437.0, 1527.0, 1437.0, 1527.0, 1469.0, 1197.0, 1469.0 ], "score": 0.97, "text": ", and their support windows", "bbox": [ 430, 517, 549, 528 ] }, { "category_id": 15, "poly": [ 866.0, 1402.0, 1539.0, 1402.0, 1539.0, 1435.0, 866.0, 1435.0 ], "score": 1.0, "text": "denotes a set of matching candidates associated with pixel", "bbox": [ 311, 504, 554, 516 ] }, { "category_id": 15, "poly": [ 864.0, 1437.0, 1100.0, 1437.0, 1100.0, 1469.0, 864.0, 1469.0 ], "score": 0.97, "text": "For a pair of pixels", "bbox": [ 311, 517, 396, 528 ] }, { "category_id": 15, "poly": [ 1122.0, 1437.0, 1176.0, 1437.0, 1176.0, 1469.0, 1122.0, 1469.0 ], "score": 0.94, "text": " and", "bbox": [ 403, 517, 423, 528 ] }, { "category_id": 15, "poly": [ 887.0, 1299.0, 1388.0, 1304.0, 1388.0, 1336.0, 887.0, 1331.0 ], "score": 0.96, "text": " To identify a match for the pixel of interest", "bbox": [ 319, 467, 499, 480 ] }, { "category_id": 15, "poly": [ 1408.0, 1299.0, 1568.0, 1304.0, 1568.0, 1336.0, 1408.0, 1331.0 ], "score": 1.0, "text": ", the real-time", "bbox": [ 506, 467, 564, 480 ] }, { "category_id": 15, "poly": [ 864.0, 1370.0, 1028.0, 1370.0, 1028.0, 1402.0, 864.0, 1402.0 ], "score": 1.0, "text": "costs between", "bbox": [ 311, 493, 370, 504 ] }, { "category_id": 15, "poly": [ 1049.0, 1370.0, 1361.0, 1370.0, 1361.0, 1402.0, 1049.0, 1402.0 ], "score": 0.99, "text": " and every match candidate", "bbox": [ 377, 493, 489, 504 ] }, { "category_id": 15, "poly": [ 160.0, 1618.0, 836.0, 1623.0, 836.0, 1655.0, 159.0, 1650.0 ], "score": 0.99, "text": "Most recently, local stereo algorithms based on edge-aware", "bbox": [ 57, 582, 300, 595 ] }, { "category_id": 15, "poly": [ 127.0, 1650.0, 841.0, 1652.0, 841.0, 1691.0, 127.0, 1689.0 ], "score": 0.97, "text": " filters were extended to incorporate temporal evidence into", "bbox": [ 45, 594, 302, 608 ] }, { "category_id": 15, "poly": [ 132.0, 1687.0, 836.0, 1687.0, 836.0, 1719.0, 132.0, 1719.0 ], "score": 0.97, "text": "the matching process. The method of Richardt et al. [19]", "bbox": [ 47, 607, 300, 618 ] }, { "category_id": 15, "poly": [ 134.0, 1723.0, 838.0, 1723.0, 838.0, 1753.0, 134.0, 1753.0 ], "score": 0.99, "text": "employs a variant of the bilateral grid [20] implemented on", "bbox": [ 48, 620, 301, 631 ] }, { "category_id": 15, "poly": [ 134.0, 1755.0, 838.0, 1755.0, 838.0, 1788.0, 134.0, 1788.0 ], "score": 0.99, "text": "graphics hardware, which accelerates cost aggregation and", "bbox": [ 48, 631, 301, 643 ] }, { "category_id": 15, "poly": [ 134.0, 1788.0, 838.0, 1788.0, 838.0, 1820.0, 134.0, 1820.0 ], "score": 1.0, "text": "allows for weighted propagation of pixel dissimilarity metrics", "bbox": [ 48, 643, 301, 655 ] }, { "category_id": 15, "poly": [ 132.0, 1822.0, 838.0, 1822.0, 838.0, 1854.0, 132.0, 1854.0 ], "score": 0.99, "text": "from previous frames to the current one. Although this method", "bbox": [ 47, 655, 301, 667 ] }, { "category_id": 15, "poly": [ 129.0, 1856.0, 838.0, 1856.0, 838.0, 1888.0, 129.0, 1888.0 ], "score": 1.0, "text": " outperforms the baseline frame-to-frame approach, the amount", "bbox": [ 46, 668, 301, 679 ] }, { "category_id": 15, "poly": [ 132.0, 1888.0, 838.0, 1888.0, 838.0, 1920.0, 132.0, 1920.0 ], "score": 0.97, "text": "of hardware memory necessary to construct the bilateral grid", "bbox": [ 47, 679, 301, 691 ] }, { "category_id": 15, "poly": [ 127.0, 1916.0, 841.0, 1918.0, 841.0, 1957.0, 127.0, 1955.0 ], "score": 0.99, "text": "limits its application to single-channel, i.e., grayscale images ", "bbox": [ 45, 689, 302, 704 ] }, { "category_id": 15, "poly": [ 132.0, 1955.0, 838.0, 1955.0, 838.0, 1985.0, 132.0, 1985.0 ], "score": 0.99, "text": "only. Hosni et al. [10], on the other hand, reformulated kernels", "bbox": [ 47, 703, 301, 714 ] }, { "category_id": 15, "poly": [ 132.0, 1989.0, 838.0, 1989.0, 838.0, 2021.0, 132.0, 2021.0 ], "score": 0.99, "text": "of the guided image filter to operate on both spatial and", "bbox": [ 47, 716, 301, 727 ] }, { "category_id": 15, "poly": [ 859.0, 809.0, 1307.0, 809.0, 1307.0, 848.0, 859.0, 848.0 ], "score": 0.99, "text": "A. Two-Pass Spatial Cost Aggregation", "bbox": [ 309, 291, 470, 305 ] }, { "category_id": 15, "poly": [ 1129.0, 376.0, 1300.0, 376.0, 1300.0, 417.0, 1129.0, 417.0 ], "score": 0.94, "text": "III. METHOD", "bbox": [ 406, 135, 468, 150 ] } ], "page_info": { "page_no": 1, "height": 2200, "width": 1700 } }, { "layout_dets": [ { "category_id": 1, "poly": [ 865.5088500976562, 856.5537109375, 1567.692626953125, 856.5537109375, 1567.692626953125, 1420.9698486328125, 865.5088500976562, 1420.9698486328125 ], "score": 0.9999963045120239, "bbox": [ 311, 308, 564, 511 ] }, { "category_id": 8, "poly": [ 281.1294860839844, 1001.0513916015625, 689.37451171875, 1001.0513916015625, 689.37451171875, 1075.8765869140625, 281.1294860839844, 1075.8765869140625 ], "score": 0.9999961256980896, "bbox": [ 101, 360, 248, 387 ] }, { "category_id": 1, "poly": [ 133.53353881835938, 158.6427459716797, 836.7297973632812, 158.6427459716797, 836.7297973632812, 390.48828125, 133.53353881835938, 390.48828125 ], "score": 0.9999960660934448, "bbox": [ 48, 57, 301, 140 ] }, { "category_id": 8, "poly": [ 145.77777099609375, 1839.6416015625, 803.4192504882812, 1839.6416015625, 803.4192504882812, 1993.239013671875, 145.77777099609375, 1993.239013671875 ], "score": 0.9999958872795105, "bbox": [ 52, 662, 289, 717 ] }, { "category_id": 1, "poly": [ 864.9884643554688, 1420.8831787109375, 1567.3118896484375, 1420.8831787109375, 1567.3118896484375, 2023.257080078125, 864.9884643554688, 2023.257080078125 ], "score": 0.9999951124191284, "bbox": [ 311, 511, 564, 728 ] }, { "category_id": 9, "poly": [ 1529.267333984375, 388.6717834472656, 1565.1744384765625, 388.6717834472656, 1565.1744384765625, 416.4899597167969, 1529.267333984375, 416.4899597167969 ], "score": 0.9999918937683105, "bbox": [ 550, 139, 563, 149 ] }, { "category_id": 9, "poly": [ 800.3933715820312, 1551.524169921875, 833.2618408203125, 1551.524169921875, 833.2618408203125, 1582.073486328125, 800.3933715820312, 1582.073486328125 ], "score": 0.9999911189079285, "bbox": [ 288, 558, 299, 569 ] }, { "category_id": 1, "poly": [ 864.3720092773438, 200.97483825683594, 1565.6871337890625, 200.97483825683594, 1565.6871337890625, 365.6230163574219, 864.3720092773438, 365.6230163574219 ], "score": 0.9999903440475464, "bbox": [ 311, 72, 563, 131 ] }, { "category_id": 1, "poly": [ 134.87628173828125, 1369.5762939453125, 835.0336303710938, 1369.5762939453125, 835.0336303710938, 1533.884765625, 134.87628173828125, 1533.884765625 ], "score": 0.9999880790710449, "bbox": [ 48, 493, 300, 552 ] }, { "category_id": 1, "poly": [ 134.59988403320312, 444.5299377441406, 836.5606079101562, 444.5299377441406, 836.5606079101562, 709.0791015625, 134.59988403320312, 709.0791015625 ], "score": 0.999987006187439, "bbox": [ 48, 160, 301, 255 ] }, { "category_id": 1, "poly": [ 134.15472412109375, 1084.4288330078125, 836.2360229492188, 1084.4288330078125, 836.2360229492188, 1314.6600341796875, 134.15472412109375, 1314.6600341796875 ], "score": 0.9999866485595703, "bbox": [ 48, 390, 301, 473 ] }, { "category_id": 9, "poly": [ 800.6007690429688, 1023.1047973632812, 833.2154541015625, 1023.1047973632812, 833.2154541015625, 1055.7227783203125, 800.6007690429688, 1055.7227783203125 ], "score": 0.9999839663505554, "bbox": [ 288, 368, 299, 380 ] }, { "category_id": 8, "poly": [ 948.4016723632812, 372.03607177734375, 1486.11279296875, 372.03607177734375, 1486.11279296875, 449.3696594238281, 948.4016723632812, 449.3696594238281 ], "score": 0.9999831914901733, "bbox": [ 341, 133, 535, 161 ] }, { "category_id": 8, "poly": [ 145.31065368652344, 714.4036254882812, 820.3599853515625, 714.4036254882812, 820.3599853515625, 791.855712890625, 145.31065368652344, 791.855712890625 ], "score": 0.9999772906303406, "bbox": [ 52, 257, 295, 285 ] }, { "category_id": 1, "poly": [ 863.8760986328125, 599.6033325195312, 1566.84619140625, 599.6033325195312, 1566.84619140625, 797.44189453125, 863.8760986328125, 797.44189453125 ], "score": 0.999976396560669, "bbox": [ 310, 215, 564, 287 ] }, { "category_id": 1, "poly": [ 864.925537109375, 464.9669189453125, 1565.212158203125, 464.9669189453125, 1565.212158203125, 529.045654296875, 864.925537109375, 529.045654296875 ], "score": 0.999973475933075, "bbox": [ 311, 167, 563, 190 ] }, { "category_id": 1, "poly": [ 133.88735961914062, 797.7457885742188, 835.5986328125, 797.7457885742188, 835.5986328125, 994.4456176757812, 133.88735961914062, 994.4456176757812 ], "score": 0.9999661445617676, "bbox": [ 48, 287, 300, 358 ] }, { "category_id": 1, "poly": [ 134.8787841796875, 1615.116455078125, 835.4554443359375, 1615.116455078125, 835.4554443359375, 1815.4564208984375, 134.8787841796875, 1815.4564208984375 ], "score": 0.9999580383300781, "bbox": [ 48, 581, 300, 653 ] }, { "category_id": 9, "poly": [ 1530.1783447265625, 550.1576538085938, 1564.607177734375, 550.1576538085938, 1564.607177734375, 578.6950073242188, 1530.1783447265625, 578.6950073242188 ], "score": 0.9999532103538513, "bbox": [ 550, 198, 563, 208 ] }, { "category_id": 9, "poly": [ 801.0740966796875, 738.4259643554688, 834.7449340820312, 738.4259643554688, 834.7449340820312, 770.4969482421875, 801.0740966796875, 770.4969482421875 ], "score": 0.9996598958969116, "bbox": [ 288, 265, 300, 277 ] }, { "category_id": 0, "poly": [ 1134.302490234375, 815.6021728515625, 1295.3885498046875, 815.6021728515625, 1295.3885498046875, 844.6544799804688, 1134.302490234375, 844.6544799804688 ], "score": 0.9994980096817017, "bbox": [ 408, 293, 466, 304 ] }, { "category_id": 9, "poly": [ 798.6090698242188, 1986.7332763671875, 834.5460205078125, 1986.7332763671875, 834.5460205078125, 2017.6595458984375, 798.6090698242188, 2017.6595458984375 ], "score": 0.9992558360099792, "bbox": [ 287, 715, 300, 726 ] }, { "category_id": 0, "poly": [ 135.0093994140625, 406.12335205078125, 475.6328125, 406.12335205078125, 475.6328125, 437.4545593261719, 135.0093994140625, 437.4545593261719 ], "score": 0.9990860819816589, "bbox": [ 48, 146, 171, 157 ] }, { "category_id": 8, "poly": [ 1029.3924560546875, 541.857177734375, 1400.174072265625, 541.857177734375, 1400.174072265625, 585.1640625, 1029.3924560546875, 585.1640625 ], "score": 0.9979717135429382, "bbox": [ 370, 195, 504, 210 ] }, { "category_id": 0, "poly": [ 133.26077270507812, 1330.139892578125, 713.5426635742188, 1330.139892578125, 713.5426635742188, 1363.1341552734375, 133.26077270507812, 1363.1341552734375 ], "score": 0.9967154860496521, "bbox": [ 47, 478, 256, 490 ] }, { "category_id": 8, "poly": [ 338.6681823730469, 1547.7218017578125, 626.6519775390625, 1547.7218017578125, 626.6519775390625, 1604.587646484375, 338.6681823730469, 1604.587646484375 ], "score": 0.9945433139801025, "bbox": [ 121, 557, 225, 577 ] }, { "category_id": 1, "poly": [ 864.5469970703125, 160.16702270507812, 1251.313720703125, 160.16702270507812, 1251.313720703125, 190.15760803222656, 864.5469970703125, 190.15760803222656 ], "score": 0.9902143478393555, "bbox": [ 311, 57, 450, 68 ] }, { "category_id": 13, "poly": [ 550, 577, 648, 577, 648, 612, 550, 612 ], "score": 0.95, "latex": "C_{a}(p,\\bar{p})", "bbox": [ 198, 207, 233, 220 ] }, { "category_id": 13, "poly": [ 183, 1780, 304, 1780, 304, 1813, 183, 1813 ], "score": 0.95, "latex": "p^{\\prime}=m(\\bar{p})", "bbox": [ 65, 640, 109, 652 ] }, { "category_id": 14, "poly": [ 279, 1000, 687, 1000, 687, 1078, 279, 1078 ], "score": 0.95, "latex": "w_{t}(p,p_{t-1})=\\exp\\bigg({-\\frac{\\Delta_{c}(p,p_{t-1})}{\\gamma_{t}}}\\bigg),", "bbox": [ 100, 360, 247, 388 ] }, { "category_id": 14, "poly": [ 147, 1843, 820, 1843, 820, 1992, 147, 1992 ], "score": 0.94, "latex": "F_{p}=\\left\\{\\begin{array}{l l}{\\underset{\\bar{p}\\in S_{p}\\setminus m(p)}{\\mathrm{min}}\\,C(p,\\bar{p})-\\underset{\\bar{p}\\in S_{p}}{\\mathrm{min}}\\,C(p,\\bar{p})}\\\\ {\\underset{\\bar{p}\\in S_{p}\\setminus m(p)}{\\mathrm{min}}\\,C(p,\\bar{p})}&{|d_{p}-d_{p^{\\prime}}|\\leq1}\\\\ {0,}&{\\mathrm{otherwise}}\\end{array}\\right..", "bbox": [ 52, 663, 295, 717 ] }, { "category_id": 14, "poly": [ 340, 1546, 628, 1546, 628, 1608, 340, 1608 ], "score": 0.93, "latex": "m(p)=\\underset{\\bar{p}\\in S_{p}}{\\mathrm{argmin}}\\,C(p,\\bar{p})\\,.", "bbox": [ 122, 556, 226, 578 ] }, { "category_id": 13, "poly": [ 321, 830, 443, 830, 443, 864, 321, 864 ], "score": 0.93, "latex": "w_{t}(p,p_{t-1})", "bbox": [ 115, 298, 159, 311 ] }, { "category_id": 13, "poly": [ 581, 1713, 694, 1713, 694, 1747, 581, 1747 ], "score": 0.93, "latex": "{\\bar{p}}=m(p)", "bbox": [ 209, 616, 249, 628 ] }, { "category_id": 14, "poly": [ 947, 373, 1478, 373, 1478, 454, 947, 454 ], "score": 0.93, "latex": "\\Lambda^{i}(p,\\bar{p})=\\alpha\\times\\sum_{q\\in\\Omega_{p}}w(p,q)F_{q}^{i-1}\\left|D_{q}^{i-1}-d_{p}\\right|\\,,", "bbox": [ 340, 134, 532, 163 ] }, { "category_id": 13, "poly": [ 426, 445, 512, 445, 512, 479, 426, 479 ], "score": 0.93, "latex": "C(p,{\\bar{p}})", "bbox": [ 153, 160, 184, 172 ] }, { "category_id": 13, "poly": [ 337, 356, 414, 356, 414, 391, 337, 391 ], "score": 0.93, "latex": "\\mathcal{O}(\\omega^{2})", "bbox": [ 121, 128, 149, 140 ] }, { "category_id": 13, "poly": [ 1341, 730, 1565, 730, 1565, 765, 1341, 765 ], "score": 0.92, "latex": "C_{a}(p,\\bar{p})\\gets C(p,\\bar{p})", "bbox": [ 482, 262, 563, 275 ] }, { "category_id": 13, "poly": [ 629, 1436, 691, 1436, 691, 1470, 629, 1470 ], "score": 0.92, "latex": "m(p)", "bbox": [ 226, 516, 248, 529 ] }, { "category_id": 13, "poly": [ 277, 1469, 361, 1469, 361, 1504, 277, 1504 ], "score": 0.92, "latex": "\\bar{p}\\in S_{p}", "bbox": [ 99, 528, 129, 541 ] }, { "category_id": 14, "poly": [ 1030, 541, 1398, 541, 1398, 582, 1030, 582 ], "score": 0.92, "latex": "C^{i}(p,\\bar{p})=C^{0}(p,\\bar{p})+{\\Lambda^{i}}(p,\\bar{p})\\,,", "bbox": [ 370, 194, 503, 209 ] }, { "category_id": 13, "poly": [ 453, 356, 518, 356, 518, 391, 453, 391 ], "score": 0.91, "latex": "\\mathcal{O}(\\omega)", "bbox": [ 163, 128, 186, 140 ] }, { "category_id": 14, "poly": [ 146, 714, 787, 714, 787, 791, 146, 791 ], "score": 0.91, "latex": "C(p,\\bar{p})\\gets\\frac{(1-\\lambda)\\cdot C(p,\\bar{p})+\\lambda\\cdot w_{t}(p,p_{t-1})\\cdot C_{a}(p,\\bar{p})}{(1-\\lambda)+\\lambda\\cdot w_{t}(p,p_{t-1})},", "bbox": [ 52, 257, 283, 284 ] }, { "category_id": 13, "poly": [ 1095, 231, 1134, 231, 1134, 270, 1095, 270 ], "score": 0.9, "latex": "D_{p}^{i}", "bbox": [ 394, 83, 408, 97 ] }, { "category_id": 13, "poly": [ 1313, 1752, 1447, 1752, 1447, 1783, 1313, 1783 ], "score": 0.89, "latex": "640~\\times~480", "bbox": [ 472, 630, 520, 641 ] }, { "category_id": 13, "poly": [ 593, 1782, 627, 1782, 627, 1815, 593, 1815 ], "score": 0.89, "latex": "F_{p}", "bbox": [ 213, 641, 225, 653 ] }, { "category_id": 13, "poly": [ 133, 326, 209, 326, 209, 355, 133, 355 ], "score": 0.88, "latex": "\\omega\\times\\omega", "bbox": [ 47, 117, 75, 127 ] }, { "category_id": 13, "poly": [ 208, 1089, 236, 1089, 236, 1116, 208, 1116 ], "score": 0.85, "latex": "\\gamma_{t}", "bbox": [ 74, 392, 84, 401 ] }, { "category_id": 13, "poly": [ 1466, 769, 1484, 769, 1484, 797, 1466, 797 ], "score": 0.83, "latex": "\\bar{p}", "bbox": [ 527, 276, 534, 286 ] }, { "category_id": 13, "poly": [ 133, 935, 177, 935, 177, 963, 133, 963 ], "score": 0.83, "latex": "p_{t-1}", "bbox": [ 47, 336, 63, 346 ] }, { "category_id": 13, "poly": [ 608, 1753, 627, 1753, 627, 1779, 608, 1779 ], "score": 0.81, "latex": "p", "bbox": [ 218, 631, 225, 640 ] }, { "category_id": 13, "poly": [ 491, 799, 511, 799, 511, 825, 491, 825 ], "score": 0.81, "latex": "\\lambda", "bbox": [ 176, 287, 183, 297 ] }, { "category_id": 13, "poly": [ 1018, 770, 1037, 770, 1037, 796, 1018, 796 ], "score": 0.81, "latex": "p", "bbox": [ 366, 277, 373, 286 ] }, { "category_id": 13, "poly": [ 1086, 470, 1107, 470, 1107, 491, 1086, 491 ], "score": 0.8, "latex": "\\alpha", "bbox": [ 390, 169, 398, 176 ] }, { "category_id": 13, "poly": [ 466, 901, 485, 901, 485, 929, 466, 929 ], "score": 0.8, "latex": "p", "bbox": [ 167, 324, 174, 334 ] }, { "category_id": 13, "poly": [ 208, 484, 227, 484, 227, 511, 208, 511 ], "score": 0.79, "latex": "p", "bbox": [ 74, 174, 81, 183 ] }, { "category_id": 13, "poly": [ 462, 1443, 480, 1443, 480, 1468, 462, 1468 ], "score": 0.77, "latex": "p", "bbox": [ 166, 519, 172, 528 ] }, { "category_id": 13, "poly": [ 266, 514, 288, 514, 288, 544, 266, 544 ], "score": 0.77, "latex": "\\bar{p}", "bbox": [ 95, 185, 103, 195 ] }, { "category_id": 13, "poly": [ 816, 1716, 836, 1716, 836, 1746, 816, 1746 ], "score": 0.73, "latex": "\\bar{p}", "bbox": [ 293, 617, 300, 628 ] }, { "category_id": 13, "poly": [ 132, 405, 154, 405, 154, 432, 132, 432 ], "score": 0.27, "latex": "B", "bbox": [ 47, 145, 55, 155 ] }, { "category_id": 13, "poly": [ 862, 160, 887, 160, 887, 187, 862, 187 ], "score": 0.26, "latex": "D", "bbox": [ 310, 57, 319, 67 ] }, { "category_id": 15, "poly": [ 887.0, 852.0, 1568.0, 855.0, 1568.0, 894.0, 887.0, 891.0 ], "score": 0.98, "text": " The speed and accuracy of real-time stereo matching al-", "bbox": [ 319, 306, 564, 321 ] }, { "category_id": 15, "poly": [ 864.0, 891.0, 1566.0, 891.0, 1566.0, 924.0, 864.0, 924.0 ], "score": 0.99, "text": "gorithms are traditionally demonstrated using still-frame im-", "bbox": [ 311, 320, 563, 332 ] }, { "category_id": 15, "poly": [ 859.0, 921.0, 1571.0, 919.0, 1571.0, 958.0, 859.0, 960.0 ], "score": 0.97, "text": " ages from the Middlebury stereo benchmark [1], [2]. Still", "bbox": [ 309, 331, 565, 344 ] }, { "category_id": 15, "poly": [ 862.0, 956.0, 1568.0, 958.0, 1568.0, 990.0, 862.0, 988.0 ], "score": 0.99, "text": "frames, however, are insufficient for evaluating stereo match-", "bbox": [ 310, 344, 564, 356 ] }, { "category_id": 15, "poly": [ 864.0, 992.0, 1571.0, 992.0, 1571.0, 1024.0, 864.0, 1024.0 ], "score": 1.0, "text": "ing algorithms that incorporate frame-to-frame prediction to", "bbox": [ 311, 357, 565, 368 ] }, { "category_id": 15, "poly": [ 864.0, 1027.0, 1568.0, 1027.0, 1568.0, 1059.0, 864.0, 1059.0 ], "score": 0.97, "text": "enhance matching accuracy. An alternative approach is to", "bbox": [ 311, 369, 564, 381 ] }, { "category_id": 15, "poly": [ 864.0, 1059.0, 1566.0, 1059.0, 1566.0, 1089.0, 864.0, 1089.0 ], "score": 0.99, "text": "use a stereo video sequence with a ground truth disparity", "bbox": [ 311, 381, 563, 392 ] }, { "category_id": 15, "poly": [ 862.0, 1091.0, 1566.0, 1091.0, 1566.0, 1123.0, 862.0, 1123.0 ], "score": 1.0, "text": "for each frame. Obtaining the ground truth disparity of real", "bbox": [ 310, 392, 563, 404 ] }, { "category_id": 15, "poly": [ 866.0, 1125.0, 1566.0, 1125.0, 1566.0, 1157.0, 866.0, 1157.0 ], "score": 0.98, "text": "world video sequences is a difficult undertaking due to the", "bbox": [ 311, 405, 563, 416 ] }, { "category_id": 15, "poly": [ 859.0, 1153.0, 1568.0, 1155.0, 1568.0, 1194.0, 859.0, 1192.0 ], "score": 0.99, "text": "high frame rate of video and limitations in depth sensing-", "bbox": [ 309, 415, 564, 429 ] }, { "category_id": 15, "poly": [ 864.0, 1192.0, 1568.0, 1192.0, 1568.0, 1224.0, 864.0, 1224.0 ], "score": 0.99, "text": "technology. To address the need for stereo video with ground", "bbox": [ 311, 429, 564, 440 ] }, { "category_id": 15, "poly": [ 864.0, 1224.0, 1568.0, 1224.0, 1568.0, 1256.0, 864.0, 1256.0 ], "score": 0.99, "text": "truth disparities, five pairs of synthetic stereo video sequences", "bbox": [ 311, 440, 564, 452 ] }, { "category_id": 15, "poly": [ 864.0, 1258.0, 1568.0, 1258.0, 1568.0, 1290.0, 864.0, 1290.0 ], "score": 0.99, "text": "of a computer-generated scene were given in [19]. While these", "bbox": [ 311, 452, 564, 464 ] }, { "category_id": 15, "poly": [ 864.0, 1290.0, 1566.0, 1290.0, 1566.0, 1322.0, 864.0, 1322.0 ], "score": 1.0, "text": "videos incorporate a sufficient amount of movement variation,", "bbox": [ 311, 464, 563, 475 ] }, { "category_id": 15, "poly": [ 862.0, 1325.0, 1568.0, 1325.0, 1568.0, 1357.0, 862.0, 1357.0 ], "score": 0.99, "text": "they were generated from relatively simple models using low-", "bbox": [ 310, 477, 564, 488 ] }, { "category_id": 15, "poly": [ 862.0, 1359.0, 1571.0, 1359.0, 1571.0, 1389.0, 862.0, 1389.0 ], "score": 0.99, "text": "resolution rendering, and they do not provide occlusion or", "bbox": [ 310, 489, 565, 500 ] }, { "category_id": 15, "poly": [ 862.0, 1386.0, 1088.0, 1394.0, 1087.0, 1426.0, 861.0, 1418.0 ], "score": 0.98, "text": "discontinuity maps.", "bbox": [ 310, 498, 391, 513 ] }, { "category_id": 15, "poly": [ 129.0, 156.0, 839.0, 158.0, 838.0, 197.0, 129.0, 195.0 ], "score": 0.99, "text": "the matching cost by performing two-pass aggregation using", "bbox": [ 46, 56, 301, 70 ] }, { "category_id": 15, "poly": [ 130.0, 188.0, 841.0, 193.0, 841.0, 229.0, 129.0, 225.0 ], "score": 0.98, "text": "two orthogonal 1D windows [5], [6], [8]. The two-pass method ", "bbox": [ 46, 67, 302, 82 ] }, { "category_id": 15, "poly": [ 129.0, 225.0, 841.0, 222.0, 841.0, 261.0, 129.0, 264.0 ], "score": 0.99, "text": "first aggregates matching costs in the vertical direction, and", "bbox": [ 46, 81, 302, 93 ] }, { "category_id": 15, "poly": [ 134.0, 261.0, 838.0, 261.0, 838.0, 293.0, 134.0, 293.0 ], "score": 0.99, "text": "then computes a weighted sum of the aggregated costs in the", "bbox": [ 48, 93, 301, 105 ] }, { "category_id": 15, "poly": [ 132.0, 291.0, 838.0, 291.0, 838.0, 330.0, 132.0, 330.0 ], "score": 0.99, "text": "horizontal direction. Given that support regions are of size", "bbox": [ 47, 104, 301, 118 ] }, { "category_id": 15, "poly": [ 136.0, 360.0, 336.0, 360.0, 336.0, 392.0, 136.0, 392.0 ], "score": 0.99, "text": "aggregation from", "bbox": [ 48, 129, 120, 141 ] }, { "category_id": 15, "poly": [ 415.0, 360.0, 452.0, 360.0, 452.0, 392.0, 415.0, 392.0 ], "score": 0.98, "text": "to", "bbox": [ 149, 129, 162, 141 ] }, { "category_id": 15, "poly": [ 210.0, 321.0, 836.0, 321.0, 836.0, 360.0, 210.0, 360.0 ], "score": 0.98, "text": ", the two-pass method reduces the complexity of cost", "bbox": [ 75, 115, 300, 129 ] }, { "category_id": 15, "poly": [ 887.0, 1416.0, 1571.0, 1419.0, 1571.0, 1458.0, 887.0, 1455.0 ], "score": 0.98, "text": " To evaluate the performance of temporal aggregation, a", "bbox": [ 319, 509, 565, 524 ] }, { "category_id": 15, "poly": [ 862.0, 1453.0, 1566.0, 1453.0, 1566.0, 1485.0, 862.0, 1485.0 ], "score": 0.98, "text": "new synthetic stereo video sequence is introduced along with", "bbox": [ 310, 523, 563, 534 ] }, { "category_id": 15, "poly": [ 862.0, 1490.0, 1566.0, 1487.0, 1566.0, 1519.0, 862.0, 1522.0 ], "score": 0.99, "text": "corresponding disparity maps, occlusion maps, and disconti-", "bbox": [ 310, 536, 563, 546 ] }, { "category_id": 15, "poly": [ 862.0, 1519.0, 1571.0, 1519.0, 1571.0, 1558.0, 862.0, 1558.0 ], "score": 0.99, "text": "nuity maps for evaluating the performance of temporal stereo", "bbox": [ 310, 546, 565, 560 ] }, { "category_id": 15, "poly": [ 864.0, 1556.0, 1568.0, 1556.0, 1568.0, 1588.0, 864.0, 1588.0 ], "score": 1.0, "text": "matching algorithms. To create the video sequence, a complex", "bbox": [ 311, 560, 564, 571 ] }, { "category_id": 15, "poly": [ 864.0, 1590.0, 1568.0, 1590.0, 1568.0, 1620.0, 864.0, 1620.0 ], "score": 0.99, "text": "scene was constructed using Google Sketchup and a pair", "bbox": [ 311, 572, 564, 583 ] }, { "category_id": 15, "poly": [ 864.0, 1622.0, 1568.0, 1622.0, 1568.0, 1655.0, 864.0, 1655.0 ], "score": 0.99, "text": "of animated paths were rendered photorealistically using the", "bbox": [ 311, 583, 564, 595 ] }, { "category_id": 15, "poly": [ 859.0, 1650.0, 1571.0, 1652.0, 1571.0, 1691.0, 859.0, 1689.0 ], "score": 0.99, "text": " Kerkythea rendering software. Realistic material properties", "bbox": [ 309, 594, 565, 608 ] }, { "category_id": 15, "poly": [ 864.0, 1689.0, 1566.0, 1689.0, 1566.0, 1721.0, 864.0, 1721.0 ], "score": 1.0, "text": "were used to give surfaces a natural-looking appearance by", "bbox": [ 311, 608, 563, 619 ] }, { "category_id": 15, "poly": [ 864.0, 1723.0, 1566.0, 1723.0, 1566.0, 1755.0, 864.0, 1755.0 ], "score": 0.98, "text": "adjusting their specularity, reflectance, and diffusion. The", "bbox": [ 311, 620, 563, 631 ] }, { "category_id": 15, "poly": [ 864.0, 1788.0, 1568.0, 1788.0, 1568.0, 1820.0, 864.0, 1820.0 ], "score": 1.0, "text": "frame rate of 30 frames per second, and a duration of 4", "bbox": [ 311, 643, 564, 655 ] }, { "category_id": 15, "poly": [ 862.0, 1817.0, 1568.0, 1820.0, 1568.0, 1859.0, 861.0, 1856.0 ], "score": 0.98, "text": "seconds. In addition to performing photorealistic rendering.", "bbox": [ 310, 654, 564, 669 ] }, { "category_id": 15, "poly": [ 864.0, 1856.0, 1568.0, 1856.0, 1568.0, 1888.0, 864.0, 1888.0 ], "score": 0.99, "text": "depth renders of both video sequences were also generated and", "bbox": [ 311, 668, 564, 679 ] }, { "category_id": 15, "poly": [ 864.0, 1888.0, 1566.0, 1888.0, 1566.0, 1920.0, 864.0, 1920.0 ], "score": 0.98, "text": "converted to ground truth disparity for the stereo video. The", "bbox": [ 311, 679, 563, 691 ] }, { "category_id": 15, "poly": [ 862.0, 1920.0, 1564.0, 1920.0, 1564.0, 1952.0, 862.0, 1952.0 ], "score": 0.99, "text": "video sequences and ground truth data have been made avail-", "bbox": [ 310, 691, 563, 702 ] }, { "category_id": 15, "poly": [ 862.0, 1950.0, 1566.0, 1953.0, 1566.0, 1985.0, 862.0, 1982.0 ], "score": 0.99, "text": "able at http://mc2.unl.edu/current-research", "bbox": [ 310, 702, 563, 714 ] }, { "category_id": 15, "poly": [ 866.0, 1989.0, 1566.0, 1989.0, 1566.0, 2019.0, 866.0, 2019.0 ], "score": 0.98, "text": "/ image-processing/. Figure 2 shows two sample frames", "bbox": [ 311, 716, 563, 726 ] }, { "category_id": 15, "poly": [ 862.0, 1755.0, 1312.0, 1755.0, 1312.0, 1788.0, 862.0, 1788.0 ], "score": 0.97, "text": "video sequence has a resolution of ", "bbox": [ 310, 631, 472, 643 ] }, { "category_id": 15, "poly": [ 1448.0, 1755.0, 1566.0, 1755.0, 1566.0, 1788.0, 1448.0, 1788.0 ], "score": 0.99, "text": "pixels,a", "bbox": [ 521, 631, 563, 643 ] }, { "category_id": 15, "poly": [ 889.0, 197.0, 1566.0, 199.0, 1566.0, 238.0, 889.0, 236.0 ], "score": 1.0, "text": "Once the first iteration of stereo matching is complete,", "bbox": [ 320, 70, 563, 85 ] }, { "category_id": 15, "poly": [ 864.0, 268.0, 1566.0, 268.0, 1566.0, 300.0, 864.0, 300.0 ], "score": 0.99, "text": "subsequent iterations. This is done by penalizing disparities", "bbox": [ 311, 96, 563, 108 ] }, { "category_id": 15, "poly": [ 864.0, 302.0, 1568.0, 302.0, 1568.0, 335.0, 864.0, 335.0 ], "score": 1.0, "text": "that deviate from their expected values. The penalty function", "bbox": [ 311, 108, 564, 120 ] }, { "category_id": 15, "poly": [ 862.0, 337.0, 996.0, 337.0, 996.0, 369.0, 862.0, 369.0 ], "score": 0.97, "text": "is given by", "bbox": [ 310, 121, 358, 132 ] }, { "category_id": 15, "poly": [ 864.0, 236.0, 1094.0, 236.0, 1094.0, 268.0, 864.0, 268.0 ], "score": 0.96, "text": "disparityestimates", "bbox": [ 311, 84, 393, 96 ] }, { "category_id": 15, "poly": [ 1135.0, 236.0, 1568.0, 236.0, 1568.0, 268.0, 1135.0, 268.0 ], "score": 0.97, "text": " can be used to guide matching in", "bbox": [ 408, 84, 564, 96 ] }, { "category_id": 15, "poly": [ 157.0, 1366.0, 839.0, 1368.0, 838.0, 1407.0, 157.0, 1405.0 ], "score": 1.0, "text": "Having performed temporal cost aggregation, matches are", "bbox": [ 56, 491, 301, 506 ] }, { "category_id": 15, "poly": [ 134.0, 1405.0, 834.0, 1405.0, 834.0, 1437.0, 134.0, 1437.0 ], "score": 0.99, "text": "determined using the Winner-Takes-All (WTA) match selec-", "bbox": [ 48, 505, 300, 517 ] }, { "category_id": 15, "poly": [ 132.0, 1506.0, 374.0, 1506.0, 374.0, 1538.0, 132.0, 1538.0 ], "score": 1.0, "text": "cost, and is given by", "bbox": [ 47, 542, 134, 553 ] }, { "category_id": 15, "poly": [ 692.0, 1439.0, 834.0, 1439.0, 834.0, 1471.0, 692.0, 1471.0 ], "score": 0.99, "text": ", is the can-", "bbox": [ 249, 518, 300, 529 ] }, { "category_id": 15, "poly": [ 134.0, 1474.0, 276.0, 1474.0, 276.0, 1506.0, 134.0, 1506.0 ], "score": 0.98, "text": "didate pixel", "bbox": [ 48, 530, 99, 542 ] }, { "category_id": 15, "poly": [ 362.0, 1474.0, 836.0, 1474.0, 836.0, 1506.0, 362.0, 1506.0 ], "score": 0.99, "text": " characterized by the minimum matching", "bbox": [ 130, 530, 300, 542 ] }, { "category_id": 15, "poly": [ 134.0, 1439.0, 461.0, 1439.0, 461.0, 1471.0, 134.0, 1471.0 ], "score": 1.0, "text": "tion criteria. The match for", "bbox": [ 48, 518, 165, 529 ] }, { "category_id": 15, "poly": [ 481.0, 1439.0, 628.0, 1439.0, 628.0, 1471.0, 481.0, 1471.0 ], "score": 0.96, "text": ", denoted as", "bbox": [ 173, 518, 226, 529 ] }, { "category_id": 15, "poly": [ 134.0, 548.0, 838.0, 545.0, 838.0, 577.0, 134.0, 580.0 ], "score": 0.99, "text": "aggregation routine is exectuted. At each time instance, the", "bbox": [ 48, 197, 301, 207 ] }, { "category_id": 15, "poly": [ 134.0, 614.0, 834.0, 614.0, 834.0, 646.0, 134.0, 646.0 ], "score": 1.0, "text": "weighted summation of costs obtained in the previous frames.", "bbox": [ 48, 221, 300, 232 ] }, { "category_id": 15, "poly": [ 132.0, 646.0, 838.0, 644.0, 838.0, 676.0, 132.0, 678.0 ], "score": 1.0, "text": "During temporal aggregation, the auxiliary cost is merged with", "bbox": [ 47, 232, 301, 243 ] }, { "category_id": 15, "poly": [ 132.0, 678.0, 675.0, 681.0, 674.0, 713.0, 132.0, 710.0 ], "score": 0.99, "text": "the cost obtained from the current frame using", "bbox": [ 47, 244, 242, 256 ] }, { "category_id": 15, "poly": [ 134.0, 580.0, 549.0, 580.0, 549.0, 612.0, 134.0, 612.0 ], "score": 1.0, "text": "algorithm stores an auxiliary cost", "bbox": [ 48, 208, 197, 220 ] }, { "category_id": 15, "poly": [ 649.0, 580.0, 841.0, 580.0, 841.0, 612.0, 649.0, 612.0 ], "score": 0.96, "text": "which holds a", "bbox": [ 233, 208, 302, 220 ] }, { "category_id": 15, "poly": [ 157.0, 445.0, 425.0, 442.0, 425.0, 481.0, 157.0, 484.0 ], "score": 0.98, "text": " Once aggregated costs", "bbox": [ 56, 160, 153, 173 ] }, { "category_id": 15, "poly": [ 513.0, 445.0, 838.0, 442.0, 838.0, 481.0, 513.0, 484.0 ], "score": 0.96, "text": " have been computed for all", "bbox": [ 184, 160, 301, 173 ] }, { "category_id": 15, "poly": [ 132.0, 481.0, 207.0, 481.0, 207.0, 513.0, 132.0, 513.0 ], "score": 1.0, "text": "pixels", "bbox": [ 47, 173, 74, 184 ] }, { "category_id": 15, "poly": [ 228.0, 481.0, 838.0, 481.0, 838.0, 513.0, 228.0, 513.0 ], "score": 0.97, "text": " in the reference image and their respective matching", "bbox": [ 82, 173, 301, 184 ] }, { "category_id": 15, "poly": [ 134.0, 516.0, 265.0, 516.0, 265.0, 548.0, 134.0, 548.0 ], "score": 1.0, "text": "candidates", "bbox": [ 48, 185, 95, 197 ] }, { "category_id": 15, "poly": [ 289.0, 516.0, 838.0, 516.0, 838.0, 548.0, 289.0, 548.0 ], "score": 0.98, "text": " in the target image, a single-pass temporal", "bbox": [ 104, 185, 301, 197 ] }, { "category_id": 15, "poly": [ 132.0, 1116.0, 841.0, 1116.0, 841.0, 1155.0, 132.0, 1155.0 ], "score": 0.99, "text": "in the temporal dimension. The temporal adaptive weight has ", "bbox": [ 47, 401, 302, 415 ] }, { "category_id": 15, "poly": [ 134.0, 1153.0, 838.0, 1153.0, 838.0, 1185.0, 134.0, 1185.0 ], "score": 0.99, "text": "the effect of preserving edges in the temporal domain, such", "bbox": [ 48, 415, 301, 426 ] }, { "category_id": 15, "poly": [ 132.0, 1182.0, 836.0, 1182.0, 836.0, 1215.0, 132.0, 1215.0 ], "score": 0.98, "text": "that when a pixel coordinate transitions from one side of an", "bbox": [ 47, 425, 300, 437 ] }, { "category_id": 15, "poly": [ 134.0, 1219.0, 838.0, 1219.0, 838.0, 1251.0, 134.0, 1251.0 ], "score": 0.98, "text": "edge to another in subsequent frames, the auxiliary cost is", "bbox": [ 48, 438, 301, 450 ] }, { "category_id": 15, "poly": [ 134.0, 1254.0, 838.0, 1254.0, 838.0, 1283.0, 134.0, 1283.0 ], "score": 0.99, "text": "assigned a small weight and the majority of the cost is derived", "bbox": [ 48, 451, 301, 461 ] }, { "category_id": 15, "poly": [ 130.0, 1283.0, 404.0, 1286.0, 404.0, 1318.0, 129.0, 1315.0 ], "score": 1.0, "text": "from the current frame.", "bbox": [ 46, 461, 145, 474 ] }, { "category_id": 15, "poly": [ 134.0, 1086.0, 207.0, 1086.0, 207.0, 1118.0, 134.0, 1118.0 ], "score": 0.99, "text": "where", "bbox": [ 48, 390, 74, 402 ] }, { "category_id": 15, "poly": [ 237.0, 1086.0, 836.0, 1086.0, 836.0, 1118.0, 237.0, 1118.0 ], "score": 0.99, "text": "regulates the strength of grouping by color similarity", "bbox": [ 85, 390, 300, 402 ] }, { "category_id": 15, "poly": [ 864.0, 600.0, 1568.0, 600.0, 1568.0, 632.0, 864.0, 632.0 ], "score": 1.0, "text": "and the matches are reselected using the WTA match selection", "bbox": [ 311, 216, 564, 227 ] }, { "category_id": 15, "poly": [ 864.0, 635.0, 1568.0, 635.0, 1568.0, 667.0, 864.0, 667.0 ], "score": 0.99, "text": "criteria. The resulting disparity maps are then post-processed", "bbox": [ 311, 228, 564, 240 ] }, { "category_id": 15, "poly": [ 864.0, 669.0, 1564.0, 669.0, 1564.0, 699.0, 864.0, 699.0 ], "score": 0.98, "text": "using a combination of median filtering and occlusion filling.", "bbox": [ 311, 240, 563, 251 ] }, { "category_id": 15, "poly": [ 864.0, 701.0, 1566.0, 701.0, 1566.0, 731.0, 864.0, 731.0 ], "score": 0.98, "text": "Finally, the current cost becomes the auxiliary cost for the next", "bbox": [ 311, 252, 563, 263 ] }, { "category_id": 15, "poly": [ 862.0, 731.0, 1340.0, 731.0, 1340.0, 770.0, 862.0, 770.0 ], "score": 0.99, "text": "pair of frames in the video sequence, i.e.,", "bbox": [ 310, 263, 482, 277 ] }, { "category_id": 15, "poly": [ 864.0, 768.0, 1017.0, 768.0, 1017.0, 800.0, 864.0, 800.0 ], "score": 1.0, "text": "for all pixels", "bbox": [ 311, 276, 366, 288 ] }, { "category_id": 15, "poly": [ 1038.0, 768.0, 1465.0, 768.0, 1465.0, 800.0, 1038.0, 800.0 ], "score": 0.98, "text": " in the and their matching candidates", "bbox": [ 373, 276, 527, 288 ] }, { "category_id": 15, "poly": [ 864.0, 502.0, 1427.0, 502.0, 1427.0, 532.0, 864.0, 532.0 ], "score": 1.0, "text": "values are incorporated into the matching cost as", "bbox": [ 311, 180, 513, 191 ] }, { "category_id": 15, "poly": [ 864.0, 468.0, 1085.0, 468.0, 1085.0, 500.0, 864.0, 500.0 ], "score": 0.96, "text": "where the value of", "bbox": [ 311, 168, 390, 180 ] }, { "category_id": 15, "poly": [ 1108.0, 468.0, 1564.0, 468.0, 1564.0, 500.0, 1108.0, 500.0 ], "score": 0.99, "text": "is chosen empirically. Next, the penalty", "bbox": [ 398, 168, 563, 180 ] }, { "category_id": 15, "poly": [ 134.0, 866.0, 838.0, 866.0, 838.0, 898.0, 134.0, 898.0 ], "score": 0.99, "text": "temporal domain. The temporal adaptive weight computed", "bbox": [ 48, 311, 301, 323 ] }, { "category_id": 15, "poly": [ 132.0, 967.0, 263.0, 967.0, 263.0, 999.0, 132.0, 999.0 ], "score": 0.93, "text": "is given by", "bbox": [ 47, 348, 94, 359 ] }, { "category_id": 15, "poly": [ 134.0, 834.0, 320.0, 834.0, 320.0, 866.0, 134.0, 866.0 ], "score": 0.97, "text": "smoothing and", "bbox": [ 48, 300, 115, 311 ] }, { "category_id": 15, "poly": [ 444.0, 834.0, 836.0, 834.0, 836.0, 866.0, 444.0, 866.0 ], "score": 0.92, "text": " enforces color similarity in the", "bbox": [ 159, 300, 300, 311 ] }, { "category_id": 15, "poly": [ 178.0, 930.0, 838.0, 928.0, 839.0, 967.0, 178.0, 969.0 ], "score": 0.99, "text": ", located at the same spatial coordinate in the prior frame,", "bbox": [ 64, 334, 302, 348 ] }, { "category_id": 15, "poly": [ 132.0, 795.0, 490.0, 800.0, 490.0, 832.0, 132.0, 827.0 ], "score": 0.99, "text": "where the feedback coefficient", "bbox": [ 47, 286, 176, 299 ] }, { "category_id": 15, "poly": [ 512.0, 795.0, 836.0, 800.0, 836.0, 832.0, 512.0, 827.0 ], "score": 0.97, "text": " controls the amount of cost", "bbox": [ 184, 286, 300, 299 ] }, { "category_id": 15, "poly": [ 136.0, 898.0, 465.0, 898.0, 465.0, 930.0, 136.0, 930.0 ], "score": 0.99, "text": "between the pixel of interest", "bbox": [ 48, 323, 167, 334 ] }, { "category_id": 15, "poly": [ 486.0, 898.0, 838.0, 898.0, 838.0, 930.0, 486.0, 930.0 ], "score": 1.0, "text": "in the current frame and pixel", "bbox": [ 174, 323, 301, 334 ] }, { "category_id": 15, "poly": [ 159.0, 1616.0, 836.0, 1616.0, 836.0, 1648.0, 159.0, 1648.0 ], "score": 0.99, "text": "To asses the level of confidence associated with selecting", "bbox": [ 57, 581, 300, 593 ] }, { "category_id": 15, "poly": [ 132.0, 1648.0, 836.0, 1650.0, 836.0, 1682.0, 132.0, 1680.0 ], "score": 1.0, "text": "minimum cost matches, the algorithm determines another set", "bbox": [ 47, 593, 300, 605 ] }, { "category_id": 15, "poly": [ 134.0, 1684.0, 838.0, 1684.0, 838.0, 1716.0, 134.0, 1716.0 ], "score": 1.0, "text": "of matches, this time from the target to reference image, and", "bbox": [ 48, 606, 301, 617 ] }, { "category_id": 15, "poly": [ 134.0, 1783.0, 182.0, 1783.0, 182.0, 1815.0, 134.0, 1815.0 ], "score": 1.0, "text": "and", "bbox": [ 48, 641, 65, 653 ] }, { "category_id": 15, "poly": [ 136.0, 1714.0, 580.0, 1714.0, 580.0, 1746.0, 136.0, 1746.0 ], "score": 0.98, "text": "verifies if the results agree. Given that", "bbox": [ 48, 617, 208, 628 ] }, { "category_id": 15, "poly": [ 305.0, 1783.0, 592.0, 1783.0, 592.0, 1815.0, 305.0, 1815.0 ], "score": 0.99, "text": ", the confidence measure", "bbox": [ 109, 641, 213, 653 ] }, { "category_id": 15, "poly": [ 628.0, 1783.0, 811.0, 1783.0, 811.0, 1815.0, 628.0, 1815.0 ], "score": 0.97, "text": "is computed as", "bbox": [ 226, 641, 291, 653 ] }, { "category_id": 15, "poly": [ 132.0, 1746.0, 607.0, 1751.0, 607.0, 1783.0, 132.0, 1778.0 ], "score": 1.0, "text": "in the right image is the match for pixel", "bbox": [ 47, 628, 218, 641 ] }, { "category_id": 15, "poly": [ 628.0, 1746.0, 836.0, 1751.0, 836.0, 1783.0, 628.0, 1778.0 ], "score": 0.98, "text": "in the left image,", "bbox": [ 226, 628, 300, 641 ] }, { "category_id": 15, "poly": [ 695.0, 1714.0, 815.0, 1714.0, 815.0, 1746.0, 695.0, 1746.0 ], "score": 0.99, "text": ", i.e. pixel", "bbox": [ 250, 617, 293, 628 ] }, { "category_id": 15, "poly": [ 1132.0, 814.0, 1298.0, 814.0, 1298.0, 852.0, 1132.0, 852.0 ], "score": 1.0, "text": "IV. RESULTS", "bbox": [ 407, 293, 467, 306 ] }, { "category_id": 15, "poly": [ 155.0, 401.0, 481.0, 406.0, 480.0, 445.0, 155.0, 440.0 ], "score": 0.99, "text": "Temporal cost aggregation", "bbox": [ 55, 144, 172, 160 ] }, { "category_id": 15, "poly": [ 129.0, 1325.0, 718.0, 1327.0, 718.0, 1366.0, 129.0, 1363.0 ], "score": 0.99, "text": "C. Disparity Selection and Confidence Assessment", "bbox": [ 46, 477, 258, 491 ] }, { "category_id": 15, "poly": [ 888.0, 158.0, 1252.0, 158.0, 1252.0, 197.0, 888.0, 197.0 ], "score": 0.97, "text": "Iterative Disparity Refinement", "bbox": [ 319, 56, 450, 70 ] } ], "page_info": { "page_no": 2, "height": 2200, "width": 1700 } }, { "layout_dets": [ { "category_id": 1, "poly": [ 133.2669677734375, 156.7020721435547, 840.6729125976562, 156.7020721435547, 840.6729125976562, 257.75836181640625, 133.2669677734375, 257.75836181640625 ], "score": 0.9999951124191284, "bbox": [ 47, 56, 302, 92 ] }, { "category_id": 3, "poly": [ 866.177734375, 171.2958526611328, 1510.944580078125, 171.2958526611328, 1510.944580078125, 848.8190307617188, 866.177734375, 848.8190307617188 ], "score": 0.9999942779541016, "bbox": [ 311, 61, 543, 305 ] }, { "category_id": 1, "poly": [ 131.3756561279297, 1520.5887451171875, 838.545166015625, 1520.5887451171875, 838.545166015625, 1885.353515625, 131.3756561279297, 1885.353515625 ], "score": 0.9999925494194031, "bbox": [ 47, 547, 301, 678 ] }, { "category_id": 4, "poly": [ 131.56919860839844, 1352.6187744140625, 840.1758422851562, 1352.6187744140625, 840.1758422851562, 1490.513671875, 131.56919860839844, 1490.513671875 ], "score": 0.9999915361404419, "bbox": [ 47, 486, 302, 536 ] }, { "category_id": 1, "poly": [ 132.41786193847656, 1886.0615234375, 838.675537109375, 1886.0615234375, 838.675537109375, 2019.347412109375, 132.41786193847656, 2019.347412109375 ], "score": 0.9999526739120483, "bbox": [ 47, 678, 301, 726 ] }, { "category_id": 3, "poly": [ 136.71240234375, 278.259765625, 816.1984252929688, 278.259765625, 816.1984252929688, 1348.5758056640625, 136.71240234375, 1348.5758056640625 ], "score": 0.9999439120292664, "bbox": [ 49, 100, 293, 485 ] }, { "category_id": 1, "poly": [ 863.4852905273438, 1917.056884765625, 1569.6337890625, 1917.056884765625, 1569.6337890625, 2020.57421875, 863.4852905273438, 2020.57421875 ], "score": 0.9999344348907471, "bbox": [ 310, 690, 565, 727 ] }, { "category_id": 4, "poly": [ 861.7813720703125, 1749.4459228515625, 1567.659912109375, 1749.4459228515625, 1567.659912109375, 1852.389892578125, 861.7813720703125, 1852.389892578125 ], "score": 0.9986151456832886, "bbox": [ 310, 629, 564, 666 ] }, { "category_id": 3, "poly": [ 874.6467895507812, 1536.7642822265625, 1506.6514892578125, 1536.7642822265625, 1506.6514892578125, 1734.9659423828125, 874.6467895507812, 1734.9659423828125 ], "score": 0.9940656423568726, "bbox": [ 314, 553, 542, 624 ] }, { "category_id": 4, "poly": [ 859.3250122070312, 861.2320556640625, 1569.650634765625, 861.2320556640625, 1569.650634765625, 1033.0804443359375, 859.3250122070312, 1033.0804443359375 ], "score": 0.985899806022644, "bbox": [ 309, 310, 565, 371 ] }, { "category_id": 1, "poly": [ 861.6172485351562, 1064.186279296875, 1564.036865234375, 1064.186279296875, 1564.036865234375, 1135.5125732421875, 861.6172485351562, 1135.5125732421875 ], "score": 0.9128350019454956, "bbox": [ 310, 383, 563, 408 ] }, { "category_id": 3, "poly": [ 888.8074340820312, 1163.7965087890625, 1529.8028564453125, 1163.7965087890625, 1529.8028564453125, 1510.91162109375, 888.8074340820312, 1510.91162109375 ], "score": 0.7896175384521484, "bbox": [ 319, 418, 550, 543 ] }, { "category_id": 0, "poly": [ 1178.85791015625, 152.25347900390625, 1284.6339111328125, 152.25347900390625, 1284.6339111328125, 179.1011962890625, 1178.85791015625, 179.1011962890625 ], "score": 0.5732811689376831, "bbox": [ 424, 54, 462, 64 ] }, { "category_id": 13, "poly": [ 1295, 896, 1483, 896, 1483, 931, 1295, 931 ], "score": 0.93, "latex": "\\{\\pm0,\\pm20,\\pm40\\}", "bbox": [ 466, 322, 533, 335 ] }, { "category_id": 13, "poly": [ 481, 1919, 534, 1919, 534, 1949, 481, 1949 ], "score": 0.87, "latex": "\\pm20", "bbox": [ 173, 690, 192, 701 ] }, { "category_id": 13, "poly": [ 591, 1919, 644, 1919, 644, 1949, 591, 1949 ], "score": 0.87, "latex": "\\pm40", "bbox": [ 212, 690, 231, 701 ] }, { "category_id": 13, "poly": [ 1227, 1436, 1253, 1436, 1253, 1459, 1227, 1459 ], "score": 0.86, "latex": "\\gamma_{c}", "bbox": [ 441, 516, 451, 525 ] }, { "category_id": 13, "poly": [ 1295, 1436, 1323, 1436, 1323, 1461, 1295, 1461 ], "score": 0.85, "latex": "\\gamma_{g}", "bbox": [ 466, 516, 476, 525 ] }, { "category_id": 13, "poly": [ 133, 1588, 186, 1588, 186, 1618, 133, 1618 ], "score": 0.85, "latex": "\\pm20", "bbox": [ 47, 571, 66, 582 ] }, { "category_id": 13, "poly": [ 249, 1587, 302, 1587, 302, 1618, 249, 1618 ], "score": 0.84, "latex": "\\pm40", "bbox": [ 89, 571, 108, 582 ] }, { "category_id": 13, "poly": [ 787, 1555, 828, 1555, 828, 1585, 787, 1585 ], "score": 0.82, "latex": "\\pm0", "bbox": [ 283, 559, 298, 570 ] }, { "category_id": 13, "poly": [ 532, 1421, 572, 1421, 572, 1452, 532, 1452 ], "score": 0.81, "latex": "3^{\\mathrm{rd}}", "bbox": [ 191, 511, 205, 522 ] }, { "category_id": 13, "poly": [ 230, 1389, 266, 1389, 266, 1419, 230, 1419 ], "score": 0.8, "latex": "1^{\\mathrm{st}}", "bbox": [ 82, 500, 95, 510 ] }, { "category_id": 13, "poly": [ 655, 1986, 675, 1986, 675, 2013, 655, 2013 ], "score": 0.78, "latex": "\\lambda", "bbox": [ 235, 714, 243, 724 ] }, { "category_id": 13, "poly": [ 200, 1455, 240, 1455, 240, 1486, 200, 1486 ], "score": 0.75, "latex": "4^{\\mathrm{th}}", "bbox": [ 72, 523, 86, 534 ] }, { "category_id": 13, "poly": [ 954, 1255, 980, 1255, 980, 1275, 954, 1275 ], "score": 0.75, "latex": "\\gamma_{c}", "bbox": [ 343, 451, 352, 459 ] }, { "category_id": 13, "poly": [ 954, 1281, 980, 1281, 980, 1302, 954, 1302 ], "score": 0.74, "latex": "\\gamma_{g}", "bbox": [ 343, 461, 352, 468 ] }, { "category_id": 13, "poly": [ 959, 1227, 976, 1227, 976, 1245, 959, 1245 ], "score": 0.74, "latex": "\\tau", "bbox": [ 345, 441, 351, 448 ] }, { "category_id": 13, "poly": [ 960, 1352, 976, 1352, 976, 1372, 960, 1372 ], "score": 0.72, "latex": "k", "bbox": [ 345, 486, 351, 493 ] }, { "category_id": 13, "poly": [ 410, 1986, 430, 1986, 430, 2013, 410, 2013 ], "score": 0.7, "latex": "\\lambda", "bbox": [ 147, 714, 154, 724 ] }, { "category_id": 13, "poly": [ 955, 1331, 979, 1331, 979, 1351, 955, 1351 ], "score": 0.7, "latex": "\\gamma_{t}", "bbox": [ 343, 479, 352, 486 ] }, { "category_id": 13, "poly": [ 1489, 1752, 1510, 1752, 1510, 1778, 1489, 1778 ], "score": 0.69, "latex": "\\lambda", "bbox": [ 536, 630, 543, 640 ] }, { "category_id": 13, "poly": [ 1176, 965, 1195, 965, 1195, 992, 1176, 992 ], "score": 0.69, "latex": "\\lambda", "bbox": [ 423, 347, 430, 357 ] }, { "category_id": 13, "poly": [ 246, 1421, 289, 1421, 289, 1452, 246, 1452 ], "score": 0.69, "latex": "2^{\\mathrm{nd}}", "bbox": [ 88, 511, 104, 522 ] }, { "category_id": 13, "poly": [ 958, 1302, 977, 1302, 977, 1323, 958, 1323 ], "score": 0.63, "latex": "\\lambda", "bbox": [ 344, 468, 351, 476 ] }, { "category_id": 13, "poly": [ 959, 1380, 977, 1380, 977, 1397, 959, 1397 ], "score": 0.58, "latex": "\\alpha", "bbox": [ 345, 496, 351, 502 ] }, { "category_id": 13, "poly": [ 436, 1621, 455, 1621, 455, 1648, 436, 1648 ], "score": 0.58, "latex": "\\lambda", "bbox": [ 156, 583, 163, 593 ] }, { "category_id": 13, "poly": [ 959, 1204, 977, 1204, 977, 1219, 959, 1219 ], "score": 0.42, "latex": "\\omega", "bbox": [ 345, 433, 351, 438 ] }, { "category_id": 13, "poly": [ 870, 1592, 890, 1592, 890, 1617, 870, 1617 ], "score": 0.31, "latex": "\\lambda", "bbox": [ 313, 573, 320, 582 ] }, { "category_id": 15, "poly": [ 134.0, 160.0, 836.0, 160.0, 836.0, 192.0, 134.0, 192.0 ], "score": 0.99, "text": "of the synthetic stereo scene from a single camera perspective,", "bbox": [ 48, 57, 300, 69 ] }, { "category_id": 15, "poly": [ 134.0, 195.0, 838.0, 195.0, 838.0, 227.0, 134.0, 227.0 ], "score": 0.99, "text": "along with the ground truth disparity, occlusion map, and", "bbox": [ 48, 70, 301, 81 ] }, { "category_id": 15, "poly": [ 130.0, 222.0, 347.0, 230.0, 346.0, 264.0, 129.0, 256.0 ], "score": 0.99, "text": "discontinuity map.", "bbox": [ 46, 79, 124, 95 ] }, { "category_id": 15, "poly": [ 155.0, 1517.0, 841.0, 1519.0, 841.0, 1558.0, 155.0, 1556.0 ], "score": 0.99, "text": " The results of temporal stereo matching are given in Figure", "bbox": [ 55, 546, 302, 560 ] }, { "category_id": 15, "poly": [ 132.0, 1657.0, 838.0, 1657.0, 838.0, 1689.0, 132.0, 1689.0 ], "score": 0.99, "text": "stereo matching methods, improvements are negligible when", "bbox": [ 47, 596, 301, 608 ] }, { "category_id": 15, "poly": [ 132.0, 1691.0, 838.0, 1691.0, 838.0, 1723.0, 132.0, 1723.0 ], "score": 0.99, "text": "no noise is added to the images [10], [19]. This is largely due", "bbox": [ 47, 608, 301, 620 ] }, { "category_id": 15, "poly": [ 132.0, 1723.0, 836.0, 1723.0, 836.0, 1753.0, 132.0, 1753.0 ], "score": 0.98, "text": "to the fact that the video used to evaluate these methods is", "bbox": [ 47, 620, 300, 631 ] }, { "category_id": 15, "poly": [ 129.0, 1753.0, 838.0, 1751.0, 839.0, 1790.0, 129.0, 1792.0 ], "score": 0.99, "text": " computer generated with very little noise to start with, thus", "bbox": [ 46, 631, 302, 644 ] }, { "category_id": 15, "poly": [ 134.0, 1790.0, 836.0, 1790.0, 836.0, 1822.0, 134.0, 1822.0 ], "score": 0.99, "text": "the noise suppression achieved with temporal stereo matching", "bbox": [ 48, 644, 300, 655 ] }, { "category_id": 15, "poly": [ 132.0, 1817.0, 839.0, 1822.0, 838.0, 1859.0, 132.0, 1854.0 ], "score": 0.99, "text": "shows little to no improvement over methods that operate on", "bbox": [ 47, 654, 301, 669 ] }, { "category_id": 15, "poly": [ 130.0, 1856.0, 319.0, 1859.0, 318.0, 1891.0, 129.0, 1888.0 ], "score": 0.99, "text": "pairs of images.", "bbox": [ 46, 668, 114, 680 ] }, { "category_id": 15, "poly": [ 187.0, 1590.0, 248.0, 1590.0, 248.0, 1622.0, 187.0, 1622.0 ], "score": 0.87, "text": ",and", "bbox": [ 67, 572, 89, 583 ] }, { "category_id": 15, "poly": [ 303.0, 1590.0, 838.0, 1590.0, 838.0, 1622.0, 303.0, 1622.0 ], "score": 0.98, "text": ". Each performance plot is given as a function", "bbox": [ 109, 572, 301, 583 ] }, { "category_id": 15, "poly": [ 127.0, 1551.0, 786.0, 1554.0, 786.0, 1593.0, 127.0, 1590.0 ], "score": 0.98, "text": " 3 for uniform additive noise confined to the ranges of", "bbox": [ 45, 558, 282, 573 ] }, { "category_id": 15, "poly": [ 134.0, 1622.0, 435.0, 1622.0, 435.0, 1655.0, 134.0, 1655.0 ], "score": 0.99, "text": "of the feedback coefficient", "bbox": [ 48, 583, 156, 595 ] }, { "category_id": 15, "poly": [ 456.0, 1622.0, 836.0, 1622.0, 836.0, 1655.0, 456.0, 1655.0 ], "score": 0.97, "text": ". As with the majority of temporal", "bbox": [ 164, 583, 300, 595 ] }, { "category_id": 15, "poly": [ 134.0, 1359.0, 834.0, 1359.0, 834.0, 1391.0, 134.0, 1391.0 ], "score": 0.99, "text": "Figure 2: Two sample frames from the synthetic video se-", "bbox": [ 48, 489, 300, 500 ] }, { "category_id": 15, "poly": [ 573.0, 1418.0, 836.0, 1421.0, 836.0, 1460.0, 573.0, 1457.0 ], "score": 1.0, "text": "row), and discontinuity", "bbox": [ 206, 510, 300, 525 ] }, { "category_id": 15, "poly": [ 134.0, 1393.0, 229.0, 1393.0, 229.0, 1425.0, 134.0, 1425.0 ], "score": 0.96, "text": "quence (", "bbox": [ 48, 501, 82, 513 ] }, { "category_id": 15, "poly": [ 267.0, 1393.0, 836.0, 1393.0, 836.0, 1425.0, 267.0, 1425.0 ], "score": 0.98, "text": "row), along with their corresponding ground truth", "bbox": [ 96, 501, 300, 513 ] }, { "category_id": 15, "poly": [ 127.0, 1456.0, 199.0, 1450.0, 199.0, 1489.0, 128.0, 1495.0 ], "score": 0.91, "text": "map (", "bbox": [ 45, 524, 71, 536 ] }, { "category_id": 15, "poly": [ 241.0, 1456.0, 309.0, 1450.0, 310.0, 1489.0, 241.0, 1495.0 ], "score": 1.0, "text": "row).", "bbox": [ 86, 524, 111, 536 ] }, { "category_id": 15, "poly": [ 129.0, 1418.0, 245.0, 1421.0, 245.0, 1460.0, 129.0, 1457.0 ], "score": 0.93, "text": " disparity ", "bbox": [ 46, 510, 88, 525 ] }, { "category_id": 15, "poly": [ 290.0, 1418.0, 531.0, 1421.0, 531.0, 1460.0, 290.0, 1457.0 ], "score": 1.0, "text": "row), occlusion map (", "bbox": [ 104, 510, 191, 525 ] }, { "category_id": 15, "poly": [ 159.0, 1888.0, 836.0, 1888.0, 836.0, 1920.0, 159.0, 1920.0 ], "score": 0.99, "text": " Significant improvements in accuracy can be seen in Figure", "bbox": [ 57, 679, 300, 691 ] }, { "category_id": 15, "poly": [ 132.0, 1950.0, 839.0, 1955.0, 838.0, 1987.0, 132.0, 1982.0 ], "score": 1.0, "text": "the effect of noise in the current frame is reduced by increasing", "bbox": [ 47, 702, 301, 715 ] }, { "category_id": 15, "poly": [ 134.0, 1920.0, 480.0, 1920.0, 480.0, 1952.0, 134.0, 1952.0 ], "score": 0.99, "text": "3 when the noise has ranges of", "bbox": [ 48, 691, 172, 702 ] }, { "category_id": 15, "poly": [ 535.0, 1920.0, 590.0, 1920.0, 590.0, 1952.0, 535.0, 1952.0 ], "score": 0.92, "text": " and", "bbox": [ 192, 691, 212, 702 ] }, { "category_id": 15, "poly": [ 645.0, 1920.0, 836.0, 1920.0, 836.0, 1952.0, 645.0, 1952.0 ], "score": 0.96, "text": ". In this scenario,", "bbox": [ 232, 691, 300, 702 ] }, { "category_id": 15, "poly": [ 676.0, 1989.0, 838.0, 1989.0, 838.0, 2019.0, 676.0, 2019.0 ], "score": 0.98, "text": "has the effect", "bbox": [ 243, 716, 301, 726 ] }, { "category_id": 15, "poly": [ 134.0, 1989.0, 409.0, 1989.0, 409.0, 2019.0, 134.0, 2019.0 ], "score": 1.0, "text": "the feedback coefficient", "bbox": [ 48, 716, 147, 726 ] }, { "category_id": 15, "poly": [ 431.0, 1989.0, 654.0, 1989.0, 654.0, 2019.0, 431.0, 2019.0 ], "score": 0.97, "text": ". This increasing of", "bbox": [ 155, 716, 235, 726 ] }, { "category_id": 15, "poly": [ 864.0, 1920.0, 1566.0, 1920.0, 1566.0, 1952.0, 864.0, 1952.0 ], "score": 0.98, "text": "of averaging out noise in the per-pixel costs by selecting", "bbox": [ 311, 691, 563, 702 ] }, { "category_id": 15, "poly": [ 861.0, 1950.0, 1566.0, 1948.0, 1566.0, 1987.0, 862.0, 1989.0 ], "score": 0.98, "text": "matches based more heavily upon the auxiliary cost, which", "bbox": [ 309, 702, 563, 715 ] }, { "category_id": 15, "poly": [ 862.0, 1989.0, 1568.0, 1989.0, 1568.0, 2021.0, 862.0, 2021.0 ], "score": 0.99, "text": "is essentially a much more stable running average of the cost", "bbox": [ 310, 716, 564, 727 ] }, { "category_id": 15, "poly": [ 864.0, 1788.0, 1564.0, 1785.0, 1564.0, 1817.0, 864.0, 1820.0 ], "score": 0.99, "text": "responding to the smallest mean squared error (MSE) of the", "bbox": [ 311, 643, 563, 654 ] }, { "category_id": 15, "poly": [ 864.0, 1822.0, 1427.0, 1822.0, 1427.0, 1854.0, 864.0, 1854.0 ], "score": 0.99, "text": "disparity estimates for a range of noise strengths.", "bbox": [ 311, 655, 513, 667 ] }, { "category_id": 15, "poly": [ 862.0, 1748.0, 1488.0, 1753.0, 1488.0, 1785.0, 861.0, 1781.0 ], "score": 0.99, "text": "Figure 4: Optimal values of the feedback coefficient ", "bbox": [ 310, 629, 535, 642 ] }, { "category_id": 15, "poly": [ 1511.0, 1748.0, 1561.0, 1753.0, 1561.0, 1785.0, 1511.0, 1781.0 ], "score": 0.96, "text": "cor-", "bbox": [ 543, 629, 561, 642 ] }, { "category_id": 15, "poly": [ 864.0, 866.0, 1566.0, 866.0, 1566.0, 898.0, 864.0, 898.0 ], "score": 0.99, "text": "Figure 3: Performance of temporal matching at different levels", "bbox": [ 311, 311, 563, 323 ] }, { "category_id": 15, "poly": [ 864.0, 935.0, 1566.0, 933.0, 1566.0, 965.0, 864.0, 967.0 ], "score": 0.98, "text": "squared error (MSE) of disparities is plotted versus the values", "bbox": [ 311, 336, 563, 347 ] }, { "category_id": 15, "poly": [ 864.0, 1001.0, 1492.0, 1001.0, 1492.0, 1031.0, 864.0, 1031.0 ], "score": 0.99, "text": "values of MSE obtained without temporal aggregation.", "bbox": [ 311, 360, 537, 371 ] }, { "category_id": 15, "poly": [ 864.0, 901.0, 1294.0, 901.0, 1294.0, 933.0, 864.0, 933.0 ], "score": 0.99, "text": "of uniformly distributed image noise", "bbox": [ 311, 324, 465, 335 ] }, { "category_id": 15, "poly": [ 1484.0, 901.0, 1568.0, 901.0, 1568.0, 933.0, 1484.0, 933.0 ], "score": 0.99, "text": ".Mean", "bbox": [ 534, 324, 564, 335 ] }, { "category_id": 15, "poly": [ 864.0, 967.0, 1175.0, 967.0, 1175.0, 999.0, 864.0, 999.0 ], "score": 0.99, "text": "of the feedback coefficient", "bbox": [ 311, 348, 423, 359 ] }, { "category_id": 15, "poly": [ 1196.0, 967.0, 1568.0, 967.0, 1568.0, 999.0, 1196.0, 999.0 ], "score": 0.99, "text": ". Dashed lines correspond to the", "bbox": [ 430, 348, 564, 359 ] }, { "category_id": 15, "poly": [ 857.0, 1061.0, 1566.0, 1068.0, 1566.0, 1107.0, 857.0, 1100.0 ], "score": 0.99, "text": " Table I: Parameters used in the evaluation of real-time tempo-", "bbox": [ 308, 381, 563, 398 ] }, { "category_id": 15, "poly": [ 859.0, 1102.0, 1093.0, 1105.0, 1092.0, 1137.0, 859.0, 1134.0 ], "score": 1.0, "text": "ral stereo matching.", "bbox": [ 309, 396, 393, 409 ] }, { "category_id": 15, "poly": [ 1178.0, 151.0, 1282.0, 151.0, 1282.0, 186.0, 1178.0, 186.0 ], "score": 1.0, "text": "Noise: ±0", "bbox": [ 424, 54, 461, 66 ] }, { "category_id": 15, "poly": [ 1178.0, 151.0, 1282.0, 151.0, 1282.0, 186.0, 1178.0, 186.0 ], "score": 1.0, "text": "Noise: ±0", "bbox": [ 424, 54, 461, 66 ] } ], "page_info": { "page_no": 3, "height": 2200, "width": 1700 } }, { "layout_dets": [ { "category_id": 5, "poly": [ 880.81298828125, 613.750244140625, 1552.5638427734375, 613.750244140625, 1552.5638427734375, 855.9174194335938, 880.81298828125, 855.9174194335938 ], "score": 0.9999957084655762, "bbox": [ 317, 220, 558, 308 ] }, { "category_id": 1, "poly": [ 862.7925415039062, 158.05548095703125, 1569.6671142578125, 158.05548095703125, 1569.6671142578125, 456.6153869628906, 862.7925415039062, 456.6153869628906 ], "score": 0.9999922513961792, "bbox": [ 310, 56, 565, 164 ] }, { "category_id": 1, "poly": [ 864.6585083007812, 1061.7374267578125, 1570.4825439453125, 1061.7374267578125, 1570.4825439453125, 1459.7132568359375, 864.6585083007812, 1459.7132568359375 ], "score": 0.9999921321868896, "bbox": [ 311, 382, 565, 525 ] }, { "category_id": 1, "poly": [ 130.64285278320312, 1519.7022705078125, 836.2221069335938, 1519.7022705078125, 836.2221069335938, 1882.68359375, 130.64285278320312, 1882.68359375 ], "score": 0.9999898672103882, "bbox": [ 47, 547, 301, 677 ] }, { "category_id": 1, "poly": [ 133.1135711669922, 158.4307861328125, 837.9683837890625, 158.4307861328125, 837.9683837890625, 323.343017578125, 133.1135711669922, 323.343017578125 ], "score": 0.9999892115592957, "bbox": [ 47, 57, 301, 116 ] }, { "category_id": 4, "poly": [ 132.3511199951172, 1347.8763427734375, 839.7514038085938, 1347.8763427734375, 839.7514038085938, 1476.9757080078125, 132.3511199951172, 1476.9757080078125 ], "score": 0.9999880790710449, "bbox": [ 47, 485, 302, 531 ] }, { "category_id": 7, "poly": [ 887.6280517578125, 860.9362182617188, 1551.5972900390625, 860.9362182617188, 1551.5972900390625, 964.0142211914062, 887.6280517578125, 964.0142211914062 ], "score": 0.9999836683273315, "bbox": [ 319, 309, 558, 347 ] }, { "category_id": 1, "poly": [ 869.9986572265625, 1514.7762451171875, 1571.624755859375, 1514.7762451171875, 1571.624755859375, 2022.618896484375, 869.9986572265625, 2022.618896484375 ], "score": 0.9999811053276062, "bbox": [ 313, 545, 565, 728 ] }, { "category_id": 3, "poly": [ 164.82151794433594, 352.74810791015625, 805.8219604492188, 352.74810791015625, 805.8219604492188, 1320.43310546875, 164.82151794433594, 1320.43310546875 ], "score": 0.9999799728393555, "bbox": [ 59, 126, 290, 475 ] }, { "category_id": 0, "poly": [ 1137.668701171875, 1477.0120849609375, 1293.498046875, 1477.0120849609375, 1293.498046875, 1502.5439453125, 1137.668701171875, 1502.5439453125 ], "score": 0.9999679327011108, "bbox": [ 409, 531, 465, 540 ] }, { "category_id": 1, "poly": [ 133.0285186767578, 1886.7501220703125, 837.0147705078125, 1886.7501220703125, 837.0147705078125, 2018.0294189453125, 133.0285186767578, 2018.0294189453125 ], "score": 0.9999630451202393, "bbox": [ 47, 679, 301, 726 ] }, { "category_id": 0, "poly": [ 1114.8399658203125, 1022.4933471679688, 1317.0313720703125, 1022.4933471679688, 1317.0313720703125, 1052.679931640625, 1114.8399658203125, 1052.679931640625 ], "score": 0.9999338984489441, "bbox": [ 401, 368, 474, 378 ] }, { "category_id": 1, "poly": [ 862.0576171875, 480.8196105957031, 1565.8367919921875, 480.8196105957031, 1565.8367919921875, 577.5508422851562, 862.0576171875, 577.5508422851562 ], "score": 0.8958550691604614, "bbox": [ 310, 173, 563, 207 ] }, { "category_id": 13, "poly": [ 736, 1445, 827, 1445, 827, 1475, 736, 1475 ], "score": 0.9, "latex": "\\lambda=0.8", "bbox": [ 264, 520, 297, 531 ] }, { "category_id": 13, "poly": [ 1003, 887, 1105, 887, 1105, 911, 1003, 911 ], "score": 0.89, "latex": "320\\times240", "bbox": [ 361, 319, 397, 327 ] }, { "category_id": 13, "poly": [ 338, 1446, 391, 1446, 391, 1475, 338, 1475 ], "score": 0.87, "latex": "\\pm30", "bbox": [ 121, 520, 140, 531 ] }, { "category_id": 13, "poly": [ 166, 1619, 219, 1619, 219, 1649, 166, 1649 ], "score": 0.85, "latex": "\\pm40", "bbox": [ 59, 582, 78, 593 ] }, { "category_id": 13, "poly": [ 301, 196, 329, 196, 329, 224, 301, 224 ], "score": 0.84, "latex": "\\gamma_{t}", "bbox": [ 108, 70, 118, 80 ] }, { "category_id": 13, "poly": [ 795, 1586, 836, 1586, 836, 1616, 795, 1616 ], "score": 0.84, "latex": "\\pm0", "bbox": [ 286, 570, 300, 581 ] }, { "category_id": 13, "poly": [ 1037, 939, 1059, 939, 1059, 960, 1037, 960 ], "score": 0.83, "latex": "\\%", "bbox": [ 373, 338, 381, 345 ] }, { "category_id": 13, "poly": [ 462, 1586, 482, 1586, 482, 1613, 462, 1613 ], "score": 0.78, "latex": "\\lambda", "bbox": [ 166, 570, 173, 580 ] }, { "category_id": 15, "poly": [ 862.0, 160.0, 1571.0, 160.0, 1571.0, 192.0, 862.0, 192.0 ], "score": 0.98, "text": "the proposed implementation achieves the highest speed of", "bbox": [ 310, 57, 565, 69 ] }, { "category_id": 15, "poly": [ 864.0, 195.0, 1566.0, 195.0, 1566.0, 227.0, 864.0, 227.0 ], "score": 0.99, "text": "operation measured by the number of disparity hypotheses", "bbox": [ 311, 70, 563, 81 ] }, { "category_id": 15, "poly": [ 864.0, 227.0, 1568.0, 227.0, 1568.0, 259.0, 864.0, 259.0 ], "score": 0.99, "text": "evaluated per second, as shown in Table I1. It is also the second", "bbox": [ 311, 81, 564, 93 ] }, { "category_id": 15, "poly": [ 862.0, 261.0, 1568.0, 261.0, 1568.0, 293.0, 862.0, 293.0 ], "score": 0.99, "text": "most accurate real-time method in terms of error rate, as", "bbox": [ 310, 93, 564, 105 ] }, { "category_id": 15, "poly": [ 864.0, 296.0, 1564.0, 296.0, 1564.0, 325.0, 864.0, 325.0 ], "score": 1.0, "text": "measured using the Middlebury stereo evaluation benchmark.", "bbox": [ 311, 106, 563, 117 ] }, { "category_id": 15, "poly": [ 859.0, 323.0, 1568.0, 325.0, 1568.0, 358.0, 859.0, 355.0 ], "score": 0.98, "text": " It should be noted that it is difficult to establish an unbiased", "bbox": [ 309, 116, 564, 128 ] }, { "category_id": 15, "poly": [ 862.0, 358.0, 1566.0, 358.0, 1566.0, 390.0, 862.0, 390.0 ], "score": 1.0, "text": "metric for speed comparisons, as the architecture, number of", "bbox": [ 310, 128, 563, 140 ] }, { "category_id": 15, "poly": [ 866.0, 394.0, 1568.0, 394.0, 1568.0, 426.0, 866.0, 426.0 ], "score": 0.98, "text": "cores, and clock speed of graphics hardware used are not", "bbox": [ 311, 141, 564, 153 ] }, { "category_id": 15, "poly": [ 862.0, 424.0, 1259.0, 429.0, 1259.0, 461.0, 861.0, 456.0 ], "score": 0.99, "text": "consistent across implementations.", "bbox": [ 310, 152, 453, 165 ] }, { "category_id": 15, "poly": [ 889.0, 1061.0, 1571.0, 1061.0, 1571.0, 1100.0, 889.0, 1100.0 ], "score": 1.0, "text": "While the majority of stereo matching algorithms focus", "bbox": [ 320, 381, 565, 396 ] }, { "category_id": 15, "poly": [ 859.0, 1093.0, 1571.0, 1095.0, 1571.0, 1134.0, 859.0, 1132.0 ], "score": 0.99, "text": " on achieving high accuracy on still images, the volume of", "bbox": [ 309, 393, 565, 408 ] }, { "category_id": 15, "poly": [ 862.0, 1130.0, 1564.0, 1130.0, 1564.0, 1162.0, 862.0, 1162.0 ], "score": 0.99, "text": "research aimed at recovery of temporally consistent disparity", "bbox": [ 310, 406, 563, 418 ] }, { "category_id": 15, "poly": [ 862.0, 1162.0, 1568.0, 1162.0, 1568.0, 1201.0, 862.0, 1201.0 ], "score": 0.99, "text": "maps remains disproportionally small. This paper introduces", "bbox": [ 310, 418, 564, 432 ] }, { "category_id": 15, "poly": [ 862.0, 1196.0, 1568.0, 1196.0, 1568.0, 1235.0, 862.0, 1235.0 ], "score": 0.98, "text": "an efficient temporal cost aggregation scheme that can easily", "bbox": [ 310, 430, 564, 444 ] }, { "category_id": 15, "poly": [ 859.0, 1226.0, 1571.0, 1228.0, 1571.0, 1267.0, 859.0, 1265.0 ], "score": 0.99, "text": "be combined with conventional spatial cost aggregation to", "bbox": [ 309, 441, 565, 456 ] }, { "category_id": 15, "poly": [ 864.0, 1265.0, 1568.0, 1265.0, 1568.0, 1297.0, 864.0, 1297.0 ], "score": 1.0, "text": "improve the accuracy of stereo matching when operating on", "bbox": [ 311, 455, 564, 466 ] }, { "category_id": 15, "poly": [ 864.0, 1297.0, 1568.0, 1297.0, 1568.0, 1329.0, 864.0, 1329.0 ], "score": 0.99, "text": "video sequences. A synthetic video sequence, along with", "bbox": [ 311, 466, 564, 478 ] }, { "category_id": 15, "poly": [ 864.0, 1331.0, 1568.0, 1331.0, 1568.0, 1364.0, 864.0, 1364.0 ], "score": 0.99, "text": "ground truth disparity data, was generated to evaluate the", "bbox": [ 311, 479, 564, 491 ] }, { "category_id": 15, "poly": [ 862.0, 1361.0, 1571.0, 1361.0, 1571.0, 1400.0, 862.0, 1400.0 ], "score": 0.98, "text": "performance of the proposed method. It was shown that", "bbox": [ 310, 489, 565, 504 ] }, { "category_id": 15, "poly": [ 864.0, 1398.0, 1571.0, 1398.0, 1571.0, 1430.0, 864.0, 1430.0 ], "score": 0.98, "text": "temporal aggregation is significantly more robust to noise than", "bbox": [ 311, 503, 565, 514 ] }, { "category_id": 15, "poly": [ 862.0, 1430.0, 1497.0, 1430.0, 1497.0, 1462.0, 862.0, 1462.0 ], "score": 0.99, "text": "a method that only considers the current stereo frames.", "bbox": [ 310, 514, 538, 526 ] }, { "category_id": 15, "poly": [ 157.0, 1517.0, 838.0, 1517.0, 838.0, 1556.0, 157.0, 1556.0 ], "score": 0.99, "text": "The optimal value of the feedback coefficient is largely", "bbox": [ 56, 546, 301, 560 ] }, { "category_id": 15, "poly": [ 134.0, 1554.0, 836.0, 1554.0, 836.0, 1584.0, 134.0, 1584.0 ], "score": 0.97, "text": "dependent on the noise being added to the image. Figure 4", "bbox": [ 48, 559, 300, 570 ] }, { "category_id": 15, "poly": [ 132.0, 1655.0, 838.0, 1655.0, 838.0, 1684.0, 132.0, 1684.0 ], "score": 0.99, "text": "rely on the auxiliary cost when noise is high and it is more", "bbox": [ 47, 595, 301, 606 ] }, { "category_id": 15, "poly": [ 132.0, 1684.0, 839.0, 1689.0, 838.0, 1721.0, 132.0, 1716.0 ], "score": 0.98, "text": "beneficial to rely on the current cost when noise is low. Figure", "bbox": [ 47, 606, 301, 619 ] }, { "category_id": 15, "poly": [ 132.0, 1719.0, 839.0, 1723.0, 838.0, 1755.0, 132.0, 1751.0 ], "score": 1.0, "text": "5 illustrates the improvements that are achieved when applying", "bbox": [ 47, 618, 301, 631 ] }, { "category_id": 15, "poly": [ 134.0, 1755.0, 836.0, 1755.0, 836.0, 1785.0, 134.0, 1785.0 ], "score": 0.98, "text": "temporal stereo matching to a particular pair of frames in the", "bbox": [ 48, 631, 300, 642 ] }, { "category_id": 15, "poly": [ 134.0, 1788.0, 834.0, 1788.0, 834.0, 1820.0, 134.0, 1820.0 ], "score": 1.0, "text": "synthetic video sequence. Clearly, the noise in the disparity", "bbox": [ 48, 643, 300, 655 ] }, { "category_id": 15, "poly": [ 134.0, 1822.0, 836.0, 1822.0, 836.0, 1854.0, 134.0, 1854.0 ], "score": 0.99, "text": "map is drastically reduced when temporal stereo matching is", "bbox": [ 48, 655, 300, 667 ] }, { "category_id": 15, "poly": [ 132.0, 1856.0, 196.0, 1856.0, 196.0, 1886.0, 132.0, 1886.0 ], "score": 1.0, "text": "used.", "bbox": [ 47, 668, 70, 678 ] }, { "category_id": 15, "poly": [ 132.0, 1620.0, 165.0, 1620.0, 165.0, 1652.0, 132.0, 1652.0 ], "score": 0.99, "text": "to", "bbox": [ 47, 583, 59, 594 ] }, { "category_id": 15, "poly": [ 220.0, 1620.0, 838.0, 1620.0, 838.0, 1652.0, 220.0, 1652.0 ], "score": 0.98, "text": ". As intuition would suggest, it is more beneficial to", "bbox": [ 79, 583, 301, 594 ] }, { "category_id": 15, "poly": [ 127.0, 1584.0, 461.0, 1581.0, 461.0, 1620.0, 127.0, 1623.0 ], "score": 0.96, "text": " shows the optimal values of", "bbox": [ 45, 570, 165, 583 ] }, { "category_id": 15, "poly": [ 483.0, 1584.0, 794.0, 1581.0, 794.0, 1620.0, 483.0, 1623.0 ], "score": 0.99, "text": "for noise ranging between", "bbox": [ 173, 570, 285, 583 ] }, { "category_id": 15, "poly": [ 134.0, 160.0, 836.0, 160.0, 836.0, 192.0, 134.0, 192.0 ], "score": 0.99, "text": "over the most recent frames. By maintaining a reasonably", "bbox": [ 48, 57, 300, 69 ] }, { "category_id": 15, "poly": [ 134.0, 229.0, 836.0, 229.0, 836.0, 261.0, 134.0, 261.0 ], "score": 0.98, "text": "edges, essentially reducing over-smoothing of a pixel's dis-", "bbox": [ 48, 82, 300, 93 ] }, { "category_id": 15, "poly": [ 132.0, 261.0, 838.0, 261.0, 838.0, 293.0, 132.0, 293.0 ], "score": 0.99, "text": "parity when a pixel transitions from one depth to another in", "bbox": [ 47, 93, 301, 105 ] }, { "category_id": 15, "poly": [ 130.0, 293.0, 354.0, 296.0, 353.0, 328.0, 129.0, 325.0 ], "score": 1.0, "text": "subsequent frames.", "bbox": [ 46, 105, 127, 118 ] }, { "category_id": 15, "poly": [ 134.0, 192.0, 300.0, 192.0, 300.0, 225.0, 134.0, 225.0 ], "score": 0.93, "text": "high value of", "bbox": [ 48, 69, 108, 81 ] }, { "category_id": 15, "poly": [ 330.0, 192.0, 836.0, 192.0, 836.0, 225.0, 330.0, 225.0 ], "score": 0.99, "text": ", the auxiliary cost also preserves temporal", "bbox": [ 118, 69, 300, 81 ] }, { "category_id": 15, "poly": [ 132.0, 1345.0, 836.0, 1348.0, 836.0, 1382.0, 132.0, 1380.0 ], "score": 1.0, "text": "Figure 5: A comparison of stereo matching without temporal", "bbox": [ 47, 484, 300, 497 ] }, { "category_id": 15, "poly": [ 132.0, 1382.0, 834.0, 1382.0, 834.0, 1414.0, 132.0, 1414.0 ], "score": 0.98, "text": "cost aggregation (top) and with temporal cost aggregation", "bbox": [ 47, 497, 300, 509 ] }, { "category_id": 15, "poly": [ 134.0, 1416.0, 836.0, 1416.0, 836.0, 1446.0, 134.0, 1446.0 ], "score": 0.98, "text": "(bottom) for a single frame in the synthetic video sequence", "bbox": [ 48, 509, 300, 520 ] }, { "category_id": 15, "poly": [ 134.0, 1448.0, 337.0, 1446.0, 337.0, 1478.0, 134.0, 1480.0 ], "score": 0.98, "text": "where the noise is", "bbox": [ 48, 521, 121, 532 ] }, { "category_id": 15, "poly": [ 392.0, 1448.0, 735.0, 1446.0, 735.0, 1478.0, 392.0, 1480.0 ], "score": 0.99, "text": "and the feedback coefficient is", "bbox": [ 141, 521, 264, 532 ] }, { "category_id": 15, "poly": [ 896.0, 855.0, 1324.0, 857.0, 1323.0, 896.0, 896.0, 894.0 ], "score": 0.95, "text": "1I Millions of Disparity Estimates per Second.", "bbox": [ 322, 307, 476, 322 ] }, { "category_id": 15, "poly": [ 903.0, 912.0, 1550.0, 912.0, 1550.0, 944.0, 903.0, 944.0 ], "score": 0.99, "text": "3 As measured by the Middlebury stereo performance benchmark using", "bbox": [ 325, 328, 558, 339 ] }, { "category_id": 15, "poly": [ 901.0, 887.0, 1002.0, 887.0, 1002.0, 919.0, 901.0, 919.0 ], "score": 0.99, "text": "2Assumes", "bbox": [ 324, 319, 360, 330 ] }, { "category_id": 15, "poly": [ 1106.0, 887.0, 1404.0, 887.0, 1404.0, 919.0, 1106.0, 919.0 ], "score": 0.98, "text": "images with 32 disparity levels.", "bbox": [ 398, 319, 505, 330 ] }, { "category_id": 15, "poly": [ 915.0, 937.0, 1036.0, 937.0, 1036.0, 969.0, 915.0, 969.0 ], "score": 0.96, "text": "the avgerage", "bbox": [ 329, 337, 372, 348 ] }, { "category_id": 15, "poly": [ 1060.0, 937.0, 1192.0, 937.0, 1192.0, 969.0, 1060.0, 969.0 ], "score": 0.96, "text": "of bad pixels.", "bbox": [ 381, 337, 429, 348 ] }, { "category_id": 15, "poly": [ 873.0, 1515.0, 1571.0, 1515.0, 1571.0, 1545.0, 873.0, 1545.0 ], "score": 0.97, "text": "[1] D. Scharstein and R. Szeliski, “A taxonomy and evaluation of dense ", "bbox": [ 314, 545, 565, 556 ] }, { "category_id": 15, "poly": [ 915.0, 1542.0, 1573.0, 1542.0, 1573.0, 1572.0, 915.0, 1572.0 ], "score": 0.98, "text": "two-frame stereo correspondence algorithms”’ International Journal of", "bbox": [ 329, 555, 566, 565 ] }, { "category_id": 15, "poly": [ 915.0, 1565.0, 1409.0, 1565.0, 1409.0, 1597.0, 915.0, 1597.0 ], "score": 0.98, "text": "Computer Vision, vol. 47, pp. 7-42, April-June 2002.", "bbox": [ 329, 563, 507, 574 ] }, { "category_id": 15, "poly": [ 871.0, 1588.0, 1568.0, 1590.0, 1568.0, 1623.0, 871.0, 1620.0 ], "score": 0.98, "text": "[2] D. Scharstein and R. Szeliski, “High-accuracy stereo depth maps using", "bbox": [ 313, 571, 564, 584 ] }, { "category_id": 15, "poly": [ 915.0, 1616.0, 1568.0, 1616.0, 1568.0, 1648.0, 915.0, 1648.0 ], "score": 0.97, "text": "structured light,” in In IEEE Computer Society Conference on Computer", "bbox": [ 329, 581, 564, 593 ] }, { "category_id": 15, "poly": [ 915.0, 1641.0, 1508.0, 1641.0, 1508.0, 1673.0, 915.0, 1673.0 ], "score": 0.98, "text": "Vision and Pattern Recognition, vol. 1, pp. 195-202, June 2003.", "bbox": [ 329, 590, 542, 602 ] }, { "category_id": 15, "poly": [ 873.0, 1666.0, 1568.0, 1666.0, 1568.0, 1696.0, 873.0, 1696.0 ], "score": 0.99, "text": "[3] J. Kowalczuk, E. Psota, and L. Perez, “Real-time stereo matching on", "bbox": [ 314, 599, 564, 610 ] }, { "category_id": 15, "poly": [ 912.0, 1689.0, 1571.0, 1689.0, 1571.0, 1721.0, 912.0, 1721.0 ], "score": 0.98, "text": " CUDA using an iterative refinement method for adaptive support-weight", "bbox": [ 328, 608, 565, 619 ] }, { "category_id": 15, "poly": [ 915.0, 1714.0, 1571.0, 1714.0, 1571.0, 1746.0, 915.0, 1746.0 ], "score": 0.99, "text": "correspondences,” Circuits and Systems for Video Technology, IEEE", "bbox": [ 329, 617, 565, 628 ] }, { "category_id": 15, "poly": [ 908.0, 1737.0, 1374.0, 1735.0, 1374.0, 1774.0, 908.0, 1776.0 ], "score": 0.96, "text": "Transactions on, vol. 23, Ppp. 94 -104, Jan. 2013.", "bbox": [ 326, 625, 494, 638 ] }, { "category_id": 15, "poly": [ 873.0, 1765.0, 1568.0, 1765.0, 1568.0, 1797.0, 873.0, 1797.0 ], "score": 0.99, "text": "[4] K.-J. Yoon and I.-S. Kweon, Locally adaptive support-weight approach", "bbox": [ 314, 635, 564, 646 ] }, { "category_id": 15, "poly": [ 912.0, 1790.0, 1571.0, 1790.0, 1571.0, 1822.0, 912.0, 1822.0 ], "score": 0.97, "text": "for visual correspondence search,' in CVPR'05: Proceedings of the 2005", "bbox": [ 328, 644, 565, 655 ] }, { "category_id": 15, "poly": [ 915.0, 1815.0, 1571.0, 1815.0, 1571.0, 1847.0, 915.0, 1847.0 ], "score": 0.96, "text": "IEEE Computer Society Conference on ComputerVision andPattern", "bbox": [ 329, 653, 565, 664 ] }, { "category_id": 15, "poly": [ 915.0, 1840.0, 1568.0, 1840.0, 1568.0, 1872.0, 915.0, 1872.0 ], "score": 0.97, "text": "Recognition (CVPR'05) - Volume 2, (Washington, DC, USA), Pp. 924-", "bbox": [ 329, 662, 564, 673 ] }, { "category_id": 15, "poly": [ 912.0, 1863.0, 1247.0, 1863.0, 1247.0, 1895.0, 912.0, 1895.0 ], "score": 0.98, "text": "931, IEEE Computer Society, 2005.", "bbox": [ 328, 670, 448, 682 ] }, { "category_id": 15, "poly": [ 873.0, 1891.0, 1568.0, 1891.0, 1568.0, 1923.0, 873.0, 1923.0 ], "score": 0.97, "text": "[5] L. Wang, M. Liao, M. Gong, R. Yang, and D. Nister, “High-quality real-", "bbox": [ 314, 680, 564, 692 ] }, { "category_id": 15, "poly": [ 912.0, 1916.0, 1566.0, 1916.0, 1566.0, 1946.0, 912.0, 1946.0 ], "score": 0.99, "text": "time stereo using adaptive cost aggregation and dynamic programming,\"", "bbox": [ 328, 689, 563, 700 ] }, { "category_id": 15, "poly": [ 910.0, 1936.0, 1568.0, 1939.0, 1568.0, 1971.0, 910.0, 1969.0 ], "score": 0.94, "text": "in 3DPVT'06:Proceedings of the Third International Symposium", "bbox": [ 327, 696, 564, 709 ] }, { "category_id": 15, "poly": [ 915.0, 1964.0, 1568.0, 1964.0, 1568.0, 1996.0, 915.0, 1996.0 ], "score": 0.98, "text": "on 3D Data Processing, Visualization, and Transmission (3DPVT'06),", "bbox": [ 329, 707, 564, 718 ] }, { "category_id": 15, "poly": [ 915.0, 1989.0, 1564.0, 1989.0, 1564.0, 2021.0, 915.0, 2021.0 ], "score": 1.0, "text": "(Washington, DC, USA), Pp. 798-805, IEEE Computer Society, 2006.", "bbox": [ 329, 716, 563, 727 ] }, { "category_id": 15, "poly": [ 1134.0, 1471.0, 1296.0, 1471.0, 1296.0, 1510.0, 1134.0, 1510.0 ], "score": 1.0, "text": "REFERENCES", "bbox": [ 408, 529, 466, 543 ] }, { "category_id": 15, "poly": [ 159.0, 1888.0, 836.0, 1888.0, 836.0, 1920.0, 159.0, 1920.0 ], "score": 0.99, "text": "The algorithm was implement using NVIDIA's Compute", "bbox": [ 57, 679, 300, 691 ] }, { "category_id": 15, "poly": [ 134.0, 1920.0, 834.0, 1920.0, 834.0, 1950.0, 134.0, 1950.0 ], "score": 0.98, "text": "Unified Device Architecture (CUDA). The details of the im-", "bbox": [ 48, 691, 300, 702 ] }, { "category_id": 15, "poly": [ 129.0, 1948.0, 841.0, 1950.0, 841.0, 1989.0, 129.0, 1987.0 ], "score": 0.98, "text": " plementation are similar to those given in [3]. When compared ", "bbox": [ 46, 701, 302, 716 ] }, { "category_id": 15, "poly": [ 132.0, 1989.0, 836.0, 1989.0, 836.0, 2021.0, 132.0, 2021.0 ], "score": 0.99, "text": "to other existing real-time stereo matching implementations,", "bbox": [ 47, 716, 300, 727 ] }, { "category_id": 15, "poly": [ 1111.0, 1022.0, 1317.0, 1022.0, 1317.0, 1061.0, 1111.0, 1061.0 ], "score": 1.0, "text": "V. CONCLUSION", "bbox": [ 399, 367, 474, 381 ] }, { "category_id": 15, "poly": [ 864.0, 484.0, 1564.0, 484.0, 1564.0, 516.0, 864.0, 516.0 ], "score": 0.99, "text": "Table II: A comparison of speed and accuracy for the imple-", "bbox": [ 311, 174, 563, 185 ] }, { "category_id": 15, "poly": [ 864.0, 518.0, 1564.0, 518.0, 1564.0, 550.0, 864.0, 550.0 ], "score": 0.99, "text": "mentations of many leading real-time stereo matching meth-", "bbox": [ 311, 186, 563, 198 ] }, { "category_id": 15, "poly": [ 862.0, 550.0, 917.0, 550.0, 917.0, 584.0, 862.0, 584.0 ], "score": 0.96, "text": "ods.", "bbox": [ 310, 198, 330, 210 ] }, { "category_id": 15, "poly": [ 864.0, 484.0, 1564.0, 484.0, 1564.0, 516.0, 864.0, 516.0 ], "score": 0.99, "text": "Table II: A comparison of speed and accuracy for the imple-", "bbox": [ 311, 174, 563, 185 ] }, { "category_id": 15, "poly": [ 864.0, 518.0, 1564.0, 518.0, 1564.0, 550.0, 864.0, 550.0 ], "score": 0.99, "text": "mentations of many leading real-time stereo matching meth-", "bbox": [ 311, 186, 563, 198 ] }, { "category_id": 15, "poly": [ 862.0, 550.0, 917.0, 550.0, 917.0, 584.0, 862.0, 584.0 ], "score": 0.96, "text": "ods.", "bbox": [ 310, 198, 330, 210 ] } ], "page_info": { "page_no": 4, "height": 2200, "width": 1700 } }, { "layout_dets": [ { "category_id": 1, "poly": [ 134.58497619628906, 157.681884765625, 841.3460693359375, 157.681884765625, 841.3460693359375, 1666.27001953125, 134.58497619628906, 1666.27001953125 ], "score": 0.9999936819076538, "bbox": [ 48, 56, 302, 599 ] }, { "category_id": 15, "poly": [ 143.0, 163.0, 838.0, 163.0, 838.0, 192.0, 143.0, 192.0 ], "score": 0.97, "text": "[6] W. Yu, T. Chen, F. Franchetti, and J. C. Hoe, “High performance stereo", "bbox": [ 51, 58, 301, 69 ] }, { "category_id": 15, "poly": [ 182.0, 188.0, 838.0, 188.0, 838.0, 218.0, 182.0, 218.0 ], "score": 0.98, "text": "vision designed for massively data parallel platforms,’ Circuits and", "bbox": [ 65, 67, 301, 78 ] }, { "category_id": 15, "poly": [ 182.0, 213.0, 841.0, 213.0, 841.0, 245.0, 182.0, 245.0 ], "score": 0.98, "text": "Systems for Video Technology, IEEE Transactions on, vol. 20, pp. 1509", "bbox": [ 65, 76, 302, 88 ] }, { "category_id": 15, "poly": [ 182.0, 238.0, 411.0, 238.0, 411.0, 268.0, 182.0, 268.0 ], "score": 0.98, "text": "-1519, November 2010.", "bbox": [ 65, 85, 147, 96 ] }, { "category_id": 15, "poly": [ 143.0, 264.0, 838.0, 264.0, 838.0, 293.0, 143.0, 293.0 ], "score": 0.99, "text": "[7] S. Mattoccia, M. Viti, and F. Ries, “Near real-time fast bilateral stereo", "bbox": [ 51, 95, 301, 105 ] }, { "category_id": 15, "poly": [ 182.0, 289.0, 838.0, 289.0, 838.0, 319.0, 182.0, 319.0 ], "score": 0.96, "text": "on the GPU in Computer Vision and Pattern Recognition Workshops", "bbox": [ 65, 104, 301, 114 ] }, { "category_id": 15, "poly": [ 178.0, 307.0, 841.0, 309.0, 841.0, 348.0, 178.0, 346.0 ], "score": 0.95, "text": "(CVPRW), 2011 IEEE Computer Society Conference on,Ppp. 136 -143,", "bbox": [ 64, 110, 302, 125 ] }, { "category_id": 15, "poly": [ 185.0, 339.0, 289.0, 339.0, 289.0, 364.0, 185.0, 364.0 ], "score": 0.98, "text": "June 2011.", "bbox": [ 66, 122, 104, 131 ] }, { "category_id": 15, "poly": [ 141.0, 362.0, 838.0, 362.0, 838.0, 392.0, 141.0, 392.0 ], "score": 0.98, "text": "[8] K. Zhang, J. Lu, Q. Yang, G. Lafruit, R. Lauwereins, and L. Van Gool,", "bbox": [ 50, 130, 301, 141 ] }, { "category_id": 15, "poly": [ 182.0, 387.0, 838.0, 387.0, 838.0, 419.0, 182.0, 419.0 ], "score": 0.98, "text": "\"Real-time and accurate stereo: A scalable approach with bitwise fast", "bbox": [ 65, 139, 301, 150 ] }, { "category_id": 15, "poly": [ 185.0, 412.0, 838.0, 412.0, 838.0, 445.0, 185.0, 445.0 ], "score": 0.97, "text": "voting on CUDA,” Circuits and Systems for Video Technology, IEEE", "bbox": [ 66, 148, 301, 160 ] }, { "category_id": 15, "poly": [ 182.0, 438.0, 656.0, 438.0, 656.0, 468.0, 182.0, 468.0 ], "score": 0.99, "text": "Transactions on, vol. 21, pp. 867 -878, July 2011.", "bbox": [ 65, 157, 236, 168 ] }, { "category_id": 15, "poly": [ 141.0, 463.0, 838.0, 463.0, 838.0, 493.0, 141.0, 493.0 ], "score": 0.96, "text": "[9] C. Rhemann, A. Hosni, M. Bleyer, C. Rother, and M. Gelautz, “Fast cost-", "bbox": [ 50, 166, 301, 177 ] }, { "category_id": 15, "poly": [ 182.0, 488.0, 838.0, 488.0, 838.0, 518.0, 182.0, 518.0 ], "score": 0.98, "text": "volume filtering for visual correspondence and beyond,\" in Computer", "bbox": [ 65, 175, 301, 186 ] }, { "category_id": 15, "poly": [ 180.0, 509.0, 841.0, 511.0, 841.0, 543.0, 180.0, 541.0 ], "score": 0.95, "text": "Vision and Pattern Recognition (CVPR), 20ll IEEE Conference on,", "bbox": [ 64, 183, 302, 195 ] }, { "category_id": 15, "poly": [ 180.0, 536.0, 448.0, 534.0, 448.0, 566.0, 180.0, 568.0 ], "score": 0.99, "text": "Pp. 3017 -3024, June 2011.", "bbox": [ 64, 192, 161, 203 ] }, { "category_id": 15, "poly": [ 134.0, 561.0, 838.0, 561.0, 838.0, 591.0, 134.0, 591.0 ], "score": 0.99, "text": "[10] A. Hosni, C. Rhemann, M. Bleyer, and M. Gelautz, “Temporally con-", "bbox": [ 48, 201, 301, 212 ] }, { "category_id": 15, "poly": [ 180.0, 587.0, 836.0, 587.0, 836.0, 616.0, 180.0, 616.0 ], "score": 0.99, "text": " sistent disparity and optical flow via efficient spatio-temporal filtering,\"", "bbox": [ 64, 211, 300, 221 ] }, { "category_id": 15, "poly": [ 182.0, 612.0, 838.0, 612.0, 838.0, 642.0, 182.0, 642.0 ], "score": 0.97, "text": "in Advances in Image and Video Technology (Y.-S. Ho, ed.), vol. 7087", "bbox": [ 65, 220, 301, 231 ] }, { "category_id": 15, "poly": [ 180.0, 632.0, 845.0, 632.0, 845.0, 671.0, 180.0, 671.0 ], "score": 0.88, "text": "of Lectureotes inComputer Science,pp.16517,Springererlin /", "bbox": [ 64, 227, 304, 241 ] }, { "category_id": 15, "poly": [ 182.0, 660.0, 353.0, 660.0, 353.0, 692.0, 182.0, 692.0 ], "score": 1.0, "text": "Heidelberg, 2012.", "bbox": [ 65, 237, 127, 249 ] }, { "category_id": 15, "poly": [ 134.0, 685.0, 838.0, 685.0, 838.0, 717.0, 134.0, 717.0 ], "score": 0.98, "text": "[11] C. Tomasi and R. Manduchi, “Bilateral filtering for gray and color", "bbox": [ 48, 246, 301, 258 ] }, { "category_id": 15, "poly": [ 182.0, 710.0, 838.0, 710.0, 838.0, 742.0, 182.0, 742.0 ], "score": 0.98, "text": "images,” in Computer Vision, 1998. Sixth International Conference on,", "bbox": [ 65, 255, 301, 267 ] }, { "category_id": 15, "poly": [ 180.0, 736.0, 411.0, 731.0, 411.0, 763.0, 181.0, 768.0 ], "score": 0.93, "text": "pPp. 839 -846, jan 1998.", "bbox": [ 64, 264, 147, 274 ] }, { "category_id": 15, "poly": [ 132.0, 761.0, 838.0, 761.0, 838.0, 791.0, 132.0, 791.0 ], "score": 0.97, "text": "[12] K. He, J. Sun, and X. Tang, “Guided image filtering,”’ in Computer", "bbox": [ 47, 273, 301, 284 ] }, { "category_id": 15, "poly": [ 180.0, 784.0, 838.0, 786.0, 838.0, 818.0, 180.0, 816.0 ], "score": 0.98, "text": "Vision - ECCV 2010, vol. 6311 of Lecture Notes in Computer Science,", "bbox": [ 64, 282, 301, 294 ] }, { "category_id": 15, "poly": [ 180.0, 811.0, 607.0, 807.0, 608.0, 839.0, 180.0, 843.0 ], "score": 0.98, "text": "pp. 1-14, Springer Berlin / Heidelberg, 2010.", "bbox": [ 64, 291, 218, 302 ] }, { "category_id": 15, "poly": [ 129.0, 832.0, 839.0, 837.0, 838.0, 869.0, 129.0, 864.0 ], "score": 0.98, "text": "[13] L. Zhang, B. Curless, and S. M. Seitz, “Spacetime stereo: Shape", "bbox": [ 46, 299, 301, 312 ] }, { "category_id": 15, "poly": [ 182.0, 862.0, 836.0, 862.0, 836.0, 891.0, 182.0, 891.0 ], "score": 0.98, "text": "recovery for dynamic scenes,” in IEEE Computer Society Conference", "bbox": [ 65, 310, 300, 320 ] }, { "category_id": 15, "poly": [ 182.0, 885.0, 834.0, 885.0, 834.0, 917.0, 182.0, 917.0 ], "score": 0.97, "text": "on Computer Vision and Pattern Recognition, pp. 367-374, June 2003.", "bbox": [ 65, 318, 300, 330 ] }, { "category_id": 15, "poly": [ 132.0, 910.0, 838.0, 910.0, 838.0, 940.0, 132.0, 940.0 ], "score": 0.98, "text": "[14] J. Davis, D. Nehab, R. Ramamoorthi, and S. Rusinkiewicz, “Spacetime", "bbox": [ 47, 327, 301, 338 ] }, { "category_id": 15, "poly": [ 182.0, 935.0, 838.0, 935.0, 838.0, 965.0, 182.0, 965.0 ], "score": 0.97, "text": "stereo: a unifying framework for depth from triangulation,”’ Pattern", "bbox": [ 65, 336, 301, 347 ] }, { "category_id": 15, "poly": [ 182.0, 960.0, 838.0, 960.0, 838.0, 990.0, 182.0, 990.0 ], "score": 0.98, "text": "Analysis and Machine Intelligence, IEEE Transactions on,vol. 27,", "bbox": [ 65, 345, 301, 356 ] }, { "category_id": 15, "poly": [ 180.0, 983.0, 462.0, 983.0, 462.0, 1015.0, 180.0, 1015.0 ], "score": 0.97, "text": "Pp. 296 -302, February 2005.", "bbox": [ 64, 353, 166, 365 ] }, { "category_id": 15, "poly": [ 132.0, 1011.0, 838.0, 1011.0, 838.0, 1040.0, 132.0, 1040.0 ], "score": 0.99, "text": "[15] E. Larsen, P. Mordohai, M. Pollefeys, and H. Fuchs, “Temporally", "bbox": [ 47, 363, 301, 374 ] }, { "category_id": 15, "poly": [ 182.0, 1036.0, 836.0, 1036.0, 836.0, 1066.0, 182.0, 1066.0 ], "score": 0.99, "text": "consistent reconstruction from multiple video streams using enhanced", "bbox": [ 65, 372, 300, 383 ] }, { "category_id": 15, "poly": [ 178.0, 1054.0, 843.0, 1056.0, 843.0, 1095.0, 178.0, 1093.0 ], "score": 0.95, "text": "belief propagation in Computer Vision, 2007.ICCV 2007. IEEE1lth", "bbox": [ 64, 379, 303, 394 ] }, { "category_id": 15, "poly": [ 180.0, 1082.0, 644.0, 1082.0, 644.0, 1121.0, 180.0, 1121.0 ], "score": 0.97, "text": "International Conference on, pp. 1 -8, oct. 2007.", "bbox": [ 64, 389, 231, 403 ] }, { "category_id": 15, "poly": [ 134.0, 1109.0, 838.0, 1109.0, 838.0, 1141.0, 134.0, 1141.0 ], "score": 0.97, "text": "[16] M. Bleyer, M. Gelautz, C. Rother, and C. Rhemann, “\"A stereo approach", "bbox": [ 48, 399, 301, 410 ] }, { "category_id": 15, "poly": [ 180.0, 1134.0, 838.0, 1134.0, 838.0, 1166.0, 180.0, 1166.0 ], "score": 0.99, "text": "that handles the mating problem via image warping\" in Computer", "bbox": [ 64, 408, 301, 419 ] }, { "category_id": 15, "poly": [ 182.0, 1157.0, 838.0, 1157.0, 838.0, 1189.0, 182.0, 1189.0 ], "score": 0.98, "text": "Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference", "bbox": [ 65, 416, 301, 428 ] }, { "category_id": 15, "poly": [ 180.0, 1183.0, 459.0, 1175.0, 460.0, 1212.0, 181.0, 1219.0 ], "score": 0.98, "text": "on, pp. 501 -508, June 2009.", "bbox": [ 64, 425, 165, 436 ] }, { "category_id": 15, "poly": [ 129.0, 1205.0, 838.0, 1208.0, 838.0, 1240.0, 129.0, 1237.0 ], "score": 0.98, "text": " [17] M. Sizintsev and R. Wildes, “Spatiotemporal stereo via spatiotemporal", "bbox": [ 46, 433, 301, 446 ] }, { "category_id": 15, "poly": [ 182.0, 1235.0, 838.0, 1235.0, 838.0, 1265.0, 182.0, 1265.0 ], "score": 0.97, "text": "quadric element (stequel) matching,” in Computer Vision and Pattern", "bbox": [ 65, 444, 301, 455 ] }, { "category_id": 15, "poly": [ 185.0, 1258.0, 841.0, 1258.0, 841.0, 1290.0, 185.0, 1290.0 ], "score": 0.98, "text": "Recognition, 2009. CVPR 2009. IEEE Conference on, Pp. 493 -500,", "bbox": [ 66, 452, 302, 464 ] }, { "category_id": 15, "poly": [ 185.0, 1286.0, 286.0, 1286.0, 286.0, 1311.0, 185.0, 1311.0 ], "score": 0.99, "text": "june 2009.", "bbox": [ 66, 462, 102, 471 ] }, { "category_id": 15, "poly": [ 132.0, 1309.0, 838.0, 1309.0, 838.0, 1338.0, 132.0, 1338.0 ], "score": 0.97, "text": "[18] M. Sizintsev and R. Wildes, “Spatiotemporal stereo and scene flow via", "bbox": [ 47, 471, 301, 481 ] }, { "category_id": 15, "poly": [ 182.0, 1334.0, 841.0, 1334.0, 841.0, 1364.0, 182.0, 1364.0 ], "score": 0.97, "text": "stequel matching,”’Pattern Analysis and Machine Intelligence, IEEE", "bbox": [ 65, 480, 302, 491 ] }, { "category_id": 15, "poly": [ 182.0, 1359.0, 684.0, 1359.0, 684.0, 1391.0, 182.0, 1391.0 ], "score": 1.0, "text": "Transactions on, vol. 34, pp. 1206 -1219, june 2012.", "bbox": [ 65, 489, 246, 500 ] }, { "category_id": 15, "poly": [ 132.0, 1382.0, 834.0, 1382.0, 834.0, 1412.0, 132.0, 1412.0 ], "score": 0.98, "text": "[19] C. Richardt, D. Orr, I. Davies, A. Criminisi, and N. A. Dodgson,", "bbox": [ 47, 497, 300, 508 ] }, { "category_id": 15, "poly": [ 185.0, 1409.0, 838.0, 1409.0, 838.0, 1441.0, 185.0, 1441.0 ], "score": 0.98, "text": "\"Real-time spatiotemporal stereo matching using the dual-cross-bilateral", "bbox": [ 66, 507, 301, 518 ] }, { "category_id": 15, "poly": [ 182.0, 1432.0, 838.0, 1432.0, 838.0, 1464.0, 182.0, 1464.0 ], "score": 0.95, "text": "grid,\" in Proceedings of the European Conference on Computer Vision", "bbox": [ 65, 515, 301, 527 ] }, { "category_id": 15, "poly": [ 182.0, 1458.0, 838.0, 1458.0, 838.0, 1490.0, 182.0, 1490.0 ], "score": 0.98, "text": "(ECCV), Lecture Notes in Computer Science, pp. 510-523, September", "bbox": [ 65, 524, 301, 536 ] }, { "category_id": 15, "poly": [ 182.0, 1477.0, 243.0, 1483.0, 241.0, 1511.0, 179.0, 1505.0 ], "score": 1.0, "text": "2010.", "bbox": [ 65, 531, 86, 543 ] }, { "category_id": 15, "poly": [ 134.0, 1508.0, 836.0, 1508.0, 836.0, 1538.0, 134.0, 1538.0 ], "score": 0.98, "text": "[20] S. Paris and F. Durand, “A fast approximation of the bilateral filter using", "bbox": [ 48, 542, 300, 553 ] }, { "category_id": 15, "poly": [ 182.0, 1533.0, 836.0, 1533.0, 836.0, 1565.0, 182.0, 1565.0 ], "score": 0.98, "text": "a signal processing approach,” Int. J. Comput. Vision, vol. 81, pp. 24-52,", "bbox": [ 65, 551, 300, 563 ] }, { "category_id": 15, "poly": [ 185.0, 1561.0, 282.0, 1561.0, 282.0, 1586.0, 185.0, 1586.0 ], "score": 0.98, "text": "Jan. 2009.", "bbox": [ 66, 561, 101, 570 ] }, { "category_id": 15, "poly": [ 134.0, 1584.0, 836.0, 1584.0, 836.0, 1613.0, 134.0, 1613.0 ], "score": 0.98, "text": "[21] Q. Yang, L. Wang, R. Yang, S. Wang, M. Liao, and D. Nistér, “Real-", "bbox": [ 48, 570, 300, 580 ] }, { "category_id": 15, "poly": [ 182.0, 1609.0, 838.0, 1609.0, 838.0, 1641.0, 182.0, 1641.0 ], "score": 0.98, "text": "time global stereo matching using hierarchical belief propagation.” in", "bbox": [ 65, 579, 301, 590 ] }, { "category_id": 15, "poly": [ 182.0, 1634.0, 698.0, 1634.0, 698.0, 1666.0, 182.0, 1666.0 ], "score": 1.0, "text": "British Machine Vision Conference, pp. 989-998, 2006.", "bbox": [ 65, 588, 251, 599 ] } ], "page_info": { "page_no": 5, "height": 2200, "width": 1700 } } ]