Commit e999df11 authored by suily's avatar suily
Browse files

添加小数据集

parent fc8c7eb8
[
{
"id": "004539375",
"image": "00453/004539375.jpg",
"conversations": [
{
"from": "human",
"value": "Render a clear and concise summary of the photo.\n<image>"
},
{
"from": "gpt",
"value": "select luxury furniture 3 - inch gel memory foam mattress topper"
}
]
},
{
"id": "002239345",
"image": "00223/002239345.jpg",
"conversations": [
{
"from": "human",
"value": "Write a terse but informative summary of the picture.\n<image>"
},
{
"from": "gpt",
"value": "a grey watch with an army style strap"
}
]
},
{
"id": "005947502",
"image": "00594/005947502.jpg",
"conversations": [
{
"from": "human",
"value": "<image>\nWhat is this?"
},
{
"from": "gpt",
"value": "a dragon kite flying in the blue sky stock images"
}
]
},
{
"id": "005116462",
"image": "00511/005116462.jpg",
"conversations": [
{
"from": "human",
"value": "<image>\nRender a clear and concise summary of the photo."
},
{
"from": "gpt",
"value": "$ 10 - cute cheap printed mini dress - khaki multicolor striped floral print peasant short sleeve tunic"
}
]
},
{
"id": "002017886",
"image": "00201/002017886.jpg",
"conversations": [
{
"from": "human",
"value": "What is in the photo?\n<image>"
},
{
"from": "gpt",
"value": "augmented reality using aruco markers in opencv"
}
]
},
{
"id": "001819539",
"image": "00181/001819539.jpg",
"conversations": [
{
"from": "human",
"value": "Describe the image concisely.\n<image>"
},
{
"from": "gpt",
"value": "a pair of childrens boots in the shape of knitted shoes"
}
]
},
{
"id": "002838027",
"image": "00283/002838027.jpg",
"conversations": [
{
"from": "human",
"value": "Share a concise interpretation of the image provided.\n<image>"
},
{
"from": "gpt",
"value": "floral watercolor clip art rose flower border with pink"
}
]
},
{
"id": "000406392",
"image": "00040/000406392.jpg",
"conversations": [
{
"from": "human",
"value": "Give a brief description of the image.\n<image>"
},
{
"from": "gpt",
"value": "the divine queen in her elaborate masks canvas print featuring the face and hands of a woman with red hair"
}
]
},
{
"id": "006288475",
"image": "00628/006288475.jpg",
"conversations": [
{
"from": "human",
"value": "<image>\nWhat is this?"
},
{
"from": "gpt",
"value": "an aerial view of paris at night from a plane stock photo"
}
]
},
{
"id": "000075539",
"image": "00007/000075539.jpg",
"conversations": [
{
"from": "human",
"value": "Give a brief description of the image.\n<image>"
},
{
"from": "gpt",
"value": "the nike air trainer low in desert tan and black"
}
]
}
]
\ No newline at end of file
[
{
"id": "Mq57LwS2UtI",
"conversations": [
{
"from": "human",
"value": "<video>\nCould you provide a summary of the incidents that occurred at various timestamps in the video?"
},
{
"from": "gpt",
"value": "A crowd of people watching an asian news broadcast from <s0> to <e0>. An image of the news, showing people at a church from <s1> to <e1>."
}
],
"meta": {
"split": [
77.3,
85.0
],
"duration": 7.7,
"token": {
"<s0>": 0,
"<e0>": 3.5,
"<s1>": 3.5,
"<e1>": 7.7
}
},
"source": "internvid"
},
{
"id": "3n3oCNerzV0",
"conversations": [
{
"from": "human",
"value": "<video>\nI'd appreciate it if you could provide a detailed account of the events that occurred at different timestamps in the video."
},
{
"from": "gpt",
"value": "From <s0> to <e0>, woman is counting money with a pen on a white table. From <s1> to <e1>, two people shaking hands in front of a desk."
}
],
"meta": {
"split": [
129.8,
184.0
],
"duration": 54.2,
"token": {
"<s0>": 0,
"<e0>": 9.4,
"<s1>": 49.4,
"<e1>": 54.2
}
},
"source": "internvid"
},
{
"id": "4vWt9kL3PJw",
"conversations": [
{
"from": "human",
"value": "<video>\nDuring which frames does a woman cutting pieces of paper that are colored occur in the video?"
},
{
"from": "gpt",
"value": "From <s1> to <e1>."
},
{
"from": "human",
"value": "At which time interval in the video can we see a person uses scissors to cut off colorful straws occurring?"
},
{
"from": "gpt",
"value": "From <s0> to <e0>."
}
],
"meta": {
"split": [
7.4,
74.7
],
"duration": 67.3,
"token": {
"<s0>": 37.6,
"<e0>": 44.6,
"<s1>": 48.6,
"<e1>": 58.4
}
},
"source": "internvid"
},
{
"id": "OfOYCsU-46s",
"conversations": [
{
"from": "human",
"value": "<video>\nCould you provide a summary of the incidents that occurred at various timestamps in the video?"
},
{
"from": "gpt",
"value": "From <s0> to <e0>, chinese characters and white background on screen in a meeting. From <s1> to <e1>, a man in a business suit wearing a face mask is holding a laptop. From <s2> to <e2>, woman wearing face mask in restaurant. From <s3> to <e3>, a man and woman wearing a face mask in a chinese newscast."
}
],
"meta": {
"split": [
61.4,
114.5
],
"duration": 53.1,
"token": {
"<s0>": 0,
"<e0>": 3.4,
"<s1>": 10.0,
"<e1>": 17.9,
"<s2>": 40.6,
"<e2>": 44.3,
"<s3>": 49.3,
"<e3>": 53.1
}
},
"source": "internvid"
},
{
"id": "JYUqV-7BPWE",
"conversations": [
{
"from": "human",
"value": "<video>\nDescribe what took place from <s1> to <e1> in the video."
},
{
"from": "gpt",
"value": "A person holding a pair of keys and closing a window."
},
{
"from": "human",
"value": "Between which two frames can we witness the man is opening a window occurring in the video?"
},
{
"from": "gpt",
"value": "From <s0> to <e0>."
}
],
"meta": {
"split": [
3.7,
27.2
],
"duration": 23.5,
"token": {
"<s0>": 4.0,
"<e0>": 9.6,
"<s1>": 9.6,
"<e1>": 15.0
}
},
"source": "internvid"
},
{
"id": "fjadWGRxWq4",
"conversations": [
{
"from": "human",
"value": "<video>\nAt what point in the video can we observe the xinhua video shows how a local chinese man enjoys the chinese new year taking place?"
},
{
"from": "gpt",
"value": "From <s0> to <e0>."
},
{
"from": "human",
"value": "At which time interval in the video can we see an asian family gathered around a table eating some food occurring?"
},
{
"from": "gpt",
"value": "From <s2> to <e2>."
},
{
"from": "human",
"value": "Between which frames is chinese elderly people sit around the table, eating the food visible in the video?"
},
{
"from": "gpt",
"value": "From <s1> to <e1>."
}
],
"meta": {
"split": [
132.5,
196.3
],
"duration": 63.8,
"token": {
"<s0>": 0,
"<e0>": 6.6,
"<s1>": 47.7,
"<e1>": 53.2,
"<s2>": 53.2,
"<e2>": 63.8
}
},
"source": "internvid"
},
{
"id": "xmWID6WCDfc",
"conversations": [
{
"from": "human",
"value": "<video>\nCould you provide a summary of the incidents that occurred at various timestamps in the video?"
},
{
"from": "gpt",
"value": "From <s0> to <e0>, the google search page with a cartoon soccer player. From <s1> to <e1>, a chinese page showing an anime image. From <s2> to <e2>, a man playing soccer with a ball and some chinese words."
}
],
"meta": {
"split": [
33.7,
152.2
],
"duration": 118.5,
"token": {
"<s0>": 0,
"<e0>": 23.6,
"<s1>": 69.9,
"<e1>": 108.8,
"<s2>": 108.8,
"<e2>": 118.5
}
},
"source": "internvid"
},
{
"id": "WJL-njJOb2w",
"conversations": [
{
"from": "human",
"value": "<video>\nBetween which two frames can we witness bitcoins in stock exchange occurring in the video?"
},
{
"from": "gpt",
"value": "From <s1> to <e1>."
},
{
"from": "human",
"value": "Share what transpired from <s0> to <e0> in the video."
},
{
"from": "gpt",
"value": "How to buy bitcoins with us dollars."
}
],
"meta": {
"split": [
39.1,
132.0
],
"duration": 92.9,
"token": {
"<s0>": 0,
"<e0>": 28.3,
"<s1>": 57.6,
"<e1>": 92.9
}
},
"source": "internvid"
},
{
"id": "oV2kjFLrXrw",
"conversations": [
{
"from": "human",
"value": "<video>\nDuring which frames can we see a man in a jacket sits on the edge of a chair happening in the video?"
},
{
"from": "gpt",
"value": "From <s1> to <e1>."
},
{
"from": "human",
"value": "Provide details about the events from <s0> to <e0> in the video."
},
{
"from": "gpt",
"value": "The woman is wearing an old white dress in a cave."
}
],
"meta": {
"split": [
169.7,
221.9
],
"duration": 52.2,
"token": {
"<s0>": 32.3,
"<e0>": 38.8,
"<s1>": 46.3,
"<e1>": 51.6
}
},
"source": "internvid"
},
{
"id": "RnLq3gGn8_s",
"conversations": [
{
"from": "human",
"value": "<video>\nDuring which frames can we see a man speaking in korean happening in the video?"
},
{
"from": "gpt",
"value": "From <s2> to <e2>."
},
{
"from": "human",
"value": "At which time interval in the video can we see a man waving his hand with the words 'i am sanbae' occurring?"
},
{
"from": "gpt",
"value": "From <s0> to <e0>."
},
{
"from": "human",
"value": "During which frames does a man is speaking in front of a crowd with green and yellow balloons occur in the video?"
},
{
"from": "gpt",
"value": "From <s1> to <e1>."
}
],
"meta": {
"split": [
21.5,
48.4
],
"duration": 26.9,
"token": {
"<s0>": 0,
"<e0>": 5.8,
"<s1>": 8.3,
"<e1>": 13.5,
"<s2>": 13.5,
"<e2>": 17.8
}
},
"source": "internvid"
}
]
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment