convert_flickr30k_ann.py 1.72 KB
Newer Older
limm's avatar
limm committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Copyright (c) OpenMMLab. All rights reserved.
"""Create COCO-Style GT annotations based on raw annotation of Flickr30k.

GT annotations are used for evaluation in image caption task.
"""

import json


def main():
    with open('dataset_flickr30k.json', 'r') as f:
        annotations = json.load(f)
    ann_list = []
    img_list = []
    splits = ['train', 'val', 'test']
    for split in splits:
        for img in annotations['images']:

            # img_example={
            #     "sentids": [0, 1, 2],
            #     "imgid": 0,
            #     "sentences": [
            #         {"raw": "Two men in green shirts standing in a yard.",
            #          "imgid": 0, "sentid": 0},
            #         {"raw": "A man in a blue shirt standing in a garden.",
            #          "imgid": 0, "sentid": 1},
            #         {"raw": "Two friends enjoy time spent together.",
            #          "imgid": 0, "sentid": 2}
            #     ],
            #     "split": "train",
            #     "filename": "1000092795.jpg"
            # },

            if img['split'] != split:
                continue

            img_list.append({'id': img['imgid']})

            for sentence in img['sentences']:
                ann_info = {
                    'image_id': img['imgid'],
                    'id': sentence['sentid'],
                    'caption': sentence['raw']
                }
                ann_list.append(ann_info)

        json_file = {'annotations': ann_list, 'images': img_list}

        # generate flickr30k_train_gt.json, flickr30k_val_gt.json
        # and flickr30k_test_gt.json
        with open(f'flickr30k_{split}_gt.json', 'w') as f:
            json.dump(json_file, f)


if __name__ == '__main__':
    main()