dataset.jsonl 12 KB
Newer Older
wanglch's avatar
wanglch committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_00", "type": "present", "text": "Corporate social responsibility and the tobacco industry: hope or hype?"}
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_01", "type": "present", "text": "this leaves BAT to argue why it should not be held to be largely accountable for the annual deaths of some 754 600 smokers, and Philip Morris some 803 600 smokers."}
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_02", "type": "present", "text": "The term \"corporate social responsibility\" is in vogue at the moment but as a concept it is vague and means different things to different people.", "max_diffs": 2}
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_03", "type": "present", "text": "Over the past three decades increasing pressure from non-governmental"}
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_04", "type": "absent", "text": "Downloaded from http://tobaccocontrol.bmj.com/"}

{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_10", "type": "order", "before": "Corporate social responsibility and the tobacco industry: hope or hype?", "after": "The unprecedented expansion of power and influence of TNCs over the past three decades has accelerated global trade and development, but also environmental damage and abuses of", "max_diffs": 2}
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_11", "type": "order", "before": "It now looks like that with vigilance", "after": "this leaves BAT to argue why it should not be held to be largely accountable for the annual deaths",  "max_diffs": 2}
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_12", "type": "order", "before": "Corporate social responsibility (CSR) emerged from a realisation among transnational corporations", "after": " perspective on its own behaviour; and reflects on whether marketing tobacco is antithetical to social responsibility.", "max_diffs": 2}

{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "discoverworld_crazy_table4_00", "type": "present", "text": "Table 4: Baseline model performance on each of the three scoring metrics"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "discoverworld_crazy_table4_01", "type": "present", "text": "Table 5: Baseline model performance on each of the three scoring metrics"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "discoverworld_crazy_table4_02", "type": "present", "text": "We use the GPT-4O model for all our agents due to its higher performance and lower cost compared to other models. For space we provide"}

{"pdf": "mattsnotes.pdf", "page": 1, "id": "mattsnotes_minediff_00", "type": "present", "checked": "verified", "text": "The-Stack-V2"}
{"pdf": "mattsnotes.pdf", "page": 1, "id": "mattsnotes_minediff_01", "type": "present", "checked": "verified", "text": "SE, whatever we've scraped"}
{"pdf": "mattsnotes.pdf", "page": 1, "id": "mattsnotes_minediff_02", "type": "present", "checked": "verified", "text": "HQ DCLM"}
{"pdf": "mattsnotes.pdf", "page": 2, "id": "mattsnotes_minediff_03", "type": "present", "checked": "verified", "text": "Order by repo"}
{"pdf": "mattsnotes.pdf", "page": 3, "id": "mattsnotes_minediff_04", "type": "present", "checked": "verified", "text": "ARCH + TRAINING"}

{"pdf": "buildingnotes.pdf", "page": 1, "id": "building_notes_00", "type": "present", "checked": "verified", "text": "Master Bath", "case_sensitive": false}
{"pdf": "buildingnotes.pdf", "page": 1, "id": "building_notes_01", "type": "present", "checked": "verified", "text": "Laundry", "case_sensitive": false}
{"pdf": "buildingnotes.pdf", "page": 1, "id": "building_notes_02", "type": "present", "checked": "verified", "text": "Guest Bath", "case_sensitive": false}

{"pdf": "lincoln_letter.pdf", "page": 1, "id": "lincoln_letter_minediff_00", "type": "present", "checked": "verified", "text": "January 10th 1864."}
{"pdf": "lincoln_letter.pdf", "page": 1, "id": "lincoln_letter_minediff_01", "type": "present", "checked": "verified", "text": "Major General Hitchcock, Commissioner of Exchanges, is authorized and directed to offer Brigadier General Trimble, now a prisoner of war in Fort McHenry, in exchange for Major White, who is held as a prisoner at Richmond."}
{"pdf": "lincoln_letter.pdf", "page": 1, "id": "lincoln_letter_minediff_03", "type": "present", "checked": "verified", "text": "He is also directed to send forward the offer of exchange by Henry M. Warfield, Esq. of Baltimore, under a flag of truce, and give him a pass to City Point."}

{"pdf": "openstax_caculus_pg_273.pdf", "page": 1, "id": "openstax_caculus_pg_273_minediff_02", "type": "present", "checked": "verified", "text": "Use the graph of the position function to determine the time intervals when the velocity is positive, negative, or zero."}
{"pdf": "openstax_caculus_pg_273.pdf", "page": 1, "id": "openstax_caculus_pg_273_minediff_03", "type": "present", "checked": "verified", "text": "Use the graph of the velocity function to determine the time intervals when the acceleration is positive, negative, or zero."}
{"pdf": "openstax_caculus_pg_273.pdf", "page": 1, "id": "openstax_caculus_pg_273_minediff_04", "type": "order", "before": "150.", "after": "157."}
{"pdf": "openstax_caculus_pg_273.pdf", "page": 1, "id": "openstax_caculus_pg_273_minediff_05", "type": "order", "before": "150.", "after": "158."}
{"pdf": "openstax_caculus_pg_273.pdf", "page": 1, "id": "openstax_caculus_pg_273_minediff_06", "type": "order", "before": "150.", "after": "159."}

{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_minediff_01", "type": "present", "checked": "verified", "text": "This report first provides the context and development of CSR; then, from internal company documents, examines how PM came to its own version."}
{"pdf": "multi_column_miss.pdf", "page": 1, "id": "multi_column_miss_minediff_02", "type": "present", "checked": "verified", "text": "This paper examines whether a tobacco company espousing CSR should be judged simply as a corporate entity along standards of business ethics, or as an irretrievably negative force in the realm of public health, thereby rendering CSR an oxymoron."}

{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_minediff_00", "type": "present", "checked": "verified", "text": "Table 1 Composition of the pretraining data for OLMo 2."}

{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table00", "type": "table", "cell": "Type"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table01", "type": "table", "cell": "3.32T", "left": "3.71T"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table02", "type": "table", "cell": "3.32T", "right": "21.32T"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table03", "type": "table", "cell": "11.8B", "up": "12.2B"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table04", "type": "table", "cell": "11.8B", "down": "3.7B"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table05", "type": "table", "cell": "3.32T", "top_heading": "Words"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table06", "type": "table", "cell": "arXiv", "top_heading": "Source"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table07", "type": "table", "cell": "47.2B", "top_heading": "Bytes"}
{"pdf": "olmo2-pg4.pdf", "page": 1, "id": "olmo2-pg4_table08", "type": "table", "cell": "Math proofs code", "left_heading": "Algebraic Stack"}

{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "olmo2-discoverworld_crazy_table4_t00", "type": "table", "cell": "Quadratic regression", "left": "Challenge"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "olmo2-discoverworld_crazy_table4_t01", "type": "table", "cell": "Instrument Use", "left": "Normal"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "olmo2-discoverworld_crazy_table4_t02", "type": "table", "cell": "0.87", "top_heading": "Procedure"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "olmo2-discoverworld_crazy_table4_t03", "type": "table", "cell": "0.87", "top_heading": "ReACT"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "olmo2-discoverworld_crazy_table4_t04", "type": "table", "cell": "Pick-and-place object", "left_heading": "27"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "olmo2-discoverworld_crazy_table4_t05", "type": "table", "cell": "0.66", "right": "0.44"}
{"pdf": "discoverworld_crazy_table4.pdf", "page": 1, "id": "olmo2-discoverworld_crazy_table4_t06", "type": "table", "cell": "Interact with a moving agent", "top_heading": "Unit Test Topic"}

{"pdf": "earnings.pdf", "page": 1, "id": "earnings_table00", "type": "table", "cell": "1,136", "top_heading": "Year Ended"}
{"pdf": "earnings.pdf", "page": 1, "id": "earnings_table01", "type": "table", "cell": "Year Ended"}
{"pdf": "earnings.pdf", "page": 1, "id": "earnings_table02", "type": "table", "cell": "680", "up": "1,892"}
{"pdf": "earnings.pdf", "page": 1, "id": "earnings_table03", "type": "table", "cell": "2,532", "left_heading": "Research and development"}
{"pdf": "earnings.pdf", "page": 1, "id": "earnings_table04", "type": "absent", "text": "62"}


{"pdf": "mathfuncs.pdf", "page": 1, "id": "mathfuncs_00", "type": "order", "before": "Euler's Identity", "after": "Pythagorean Theorem"}
{"pdf": "mathfuncs.pdf", "page": 1, "id": "mathfuncs_01", "type": "order", "before": "Pythagorean Theorem", "after": "The Fundamental Theorem of Calculus"}
{"pdf": "mathfuncs.pdf", "page": 1, "id": "mathfuncs_02", "type": "order", "before": "The Fundamental Theorem of Calculus", "after": "Maxwell's Equations"}
{"pdf": "mathfuncs.pdf", "page": 1, "id": "mathfuncs_03", "type": "math", "math": "e^{i \\pi}+1=0"}
{"pdf": "mathfuncs.pdf", "page": 1, "id": "mathfuncs_04", "type": "math", "math": "\\int_{a}^{b} f(x) d x=F(b)-F(a)"}
{"pdf": "mathfuncs.pdf", "page": 1, "id": "mathfuncs_05", "type": "math", "math": "a^{2}+b^{2}=c^{2}"}
{"pdf": "mathfuncs.pdf", "page": 1, "id": "mathfuncs_06", "type": "math", "math": "\\nabla \\times \\mathbf{E}=-\\frac{\\partial \\mathbf{B}}{\\partial t}"}

{"pdf": "mathfuncs_colswitch.pdf", "page": 1, "id": "mathfuncscol_00", "type": "order", "before": "Euler's Identity", "after": "Pythagorean Theorem"}
{"pdf": "mathfuncs_colswitch.pdf", "page": 1, "id": "mathfuncscol_01", "type": "order", "before": "Pythagorean Theorem", "after": "The Fundamental Theorem of Calculus"}
{"pdf": "mathfuncs_colswitch.pdf", "page": 1, "id": "mathfuncscol_02", "type": "order", "before": "The Fundamental Theorem of Calculus", "after": "Maxwell's Equations"}
{"pdf": "mathfuncs_colswitch.pdf", "page": 1, "id": "mathfuncscol_03", "type": "math", "math": "e^{i \\pi}+1=0"}
{"pdf": "mathfuncs_colswitch.pdf", "page": 1, "id": "mathfuncscol_04", "type": "math", "math": "\\int_{a}^{b} f(x) d x=F(b)-F(a)"}
{"pdf": "mathfuncs_colswitch.pdf", "page": 1, "id": "mathfuncscol_05", "type": "math", "math": "a^{2}+b^{2}=c^{2}"}
{"pdf": "mathfuncs_colswitch.pdf", "page": 1, "id": "mathfuncscol_06", "type": "math", "math": "\\nabla \\times \\mathbf{E}=-\\frac{\\partial \\mathbf{B}}{\\partial t}"}

{"pdf": "math_2503_04086.pdf", "page": 1, "id": "math_2503_04086_00", "type": "math", "math": "\\lambda_{g}=\\sum_{s \\in S} \\zeta_{n}^{\\psi(g s)}=\\sum_{i=1}^{k}\\left[\\sum_{s, R s=\\mathcal{I}_{i}} \\zeta_{n}^{\\psi(g s)}\\right]"}
{"pdf": "math_2503_04086.pdf", "page": 1, "id": "math_2503_04086_01", "type": "math", "math": "\\lambda_{g}=\\lambda_{g^{\\prime}}"}
{"pdf": "math_2503_04086.pdf", "page": 1, "id": "math_2503_04086_02", "type": "math", "math": "u \\in\\left(R / \\operatorname{Ann}_{R}\\left(x_{i}\\right)\\right)^{\\times}"}
{"pdf": "math_2503_04086.pdf", "page": 1, "id": "math_2503_04086_03", "type": "math", "math": "\\lambda_{g}=\\sum_{i=1}^{k} c\\left(g, R / \\operatorname{Ann}_{R}\\left(x_{i}\\right)\\right)"}
{"pdf": "math_2503_04086.pdf", "page": 1, "id": "math_2503_04086_04", "type": "present", "text": "We also thank Ján Mináč for his constant encouragement and support."}
{"pdf": "math_2503_04086.pdf", "page": 1, "id": "math_2503_04086_05", "type": "present", "text": "Allgemeine theorie der Gaußschen Summen in endlichen kommutativen Ringe"}