idk 6.79 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3

import argparse

from index_kits.dataset.make_dataset_core import startup, make_multireso
from index_kits.common import show_index_info
from index_kits import __version__


def common_args(parser):
    parser.add_argument('-t', '--target', type=str, required=True, help='Save path')


def get_args():
    parser = argparse.ArgumentParser(description="""
    IndexKits is a tool to build and manage index files for large-scale datasets. 
    It supports both base index and multi-resolution index.
    
    Introduction
    ------------
    This command line tool provides the following functionalities:
    1. Show index v2 information
    2. Build base index v2
    3. Build multi-resolution index v2
    
    Examples
    --------
    1. Show index v2 information
        index_kits show /path/to/index.json
    
    2. Build base index v2
        Default usage:
        index_kits base -c /path/to/config.yaml -t /path/to/index.json
        
        Use multiple processes:
        index_kits base -c /path/to/config.yaml -t /path/to/index.json -w 40
    
    3. Build multi-resolution index v2
    
        Build with a configuration file:
        index_kits multireso -c /path/to/config.yaml -t /path/to/index_mb_gt512.json
        
        Build by specifying arguments without a configuration file:
        index_kits multireso --src /path/to/index.json --base-size 512 --reso-step 32 --min-size 512 -t /path/to/index_mb_gt512.json 
        
        Build by specifying target-ratios:
        index_kits multireso --src /path/to/index.json --base-size 512 --target-ratios 1:1 4:3 3:4 16:9 9:16 --min-size 512 -t /path/to/index_mb_gt512.json
        
        Build with multiple source index files.
        index_kits multireso --src /path/to/index1.json /path/to/index2.json --base-size 512 --reso-step 32 --min-size 512 -t /path/to/index_mb_gt512.json
    """, formatter_class=argparse.RawTextHelpFormatter)
    sub_parsers = parser.add_subparsers(dest='task', required=True)

    # Show index message
    show_parser = sub_parsers.add_parser('show', description="""
    Show base/multireso index v2 information.
    
    Example
    -------
    index_kits show /path/to/index.json
    """, formatter_class=argparse.RawTextHelpFormatter)
    show_parser.add_argument('src', type=str, help='Path to a base/multireso index file.')
    show_parser.add_argument('--arrow-files', action='store_true', help='Show arrow files only.')
    show_parser.add_argument('--depth', type=int, default=1,
                             help='Arrow file depth. Default is 1, the level of last folder in the arrow file path. '
                                  'Set it to 0 to show the full path including `xxx/last_folder/*.arrow`.')

    # Single resolution bucket
    base_parser = sub_parsers.add_parser('base', description="""
    Build base index v2.
    
    Example
    -------
    index_kits base -c /path/to/config.yaml -t /path/to/index.json
    """, formatter_class=argparse.RawTextHelpFormatter)
    base_parser.add_argument('-c', '--config', type=str, required=True, help='Configuration file path')
    common_args(base_parser)
    base_parser.add_argument('-w', '--world-size', type=int, default=1)
    base_parser.add_argument('--work-dir', type=str, default='.', help='Work directory')
    base_parser.add_argument('--use-cache', action='store_true', help='Use cache to avoid reprocessing. '
                                                                      'Perform merge pkl results directly.')

    # Multi-resolution bucket
    mo_parser = sub_parsers.add_parser('multireso', description="""
    Build multi-resolution index v2
    
    Example
    -------
    Build with a configuration file:
    index_kits multireso -c /path/to/config.yaml -t /path/to/index_mb_gt512.json
    
    Build by specifying arguments without a configuration file:
    index_kits multireso --src /path/to/index.json --base-size 512 --reso-step 32 --min-size 512 -t /path/to/index_mb_gt512.json
    
    Build by specifying target-ratios:
    index_kits multireso --src /path/to/index.json --base-size 512 --target-ratios 1:1 4:3 3:4 16:9 9:16 --min-size 512 -t /path/to/index_mb_gt512.json
    
    Build with multiple source index files.
    index_kits multireso --src /path/to/index1.json /path/to/index2.json --base-size 512 --reso-step 32 --min-size 512 -t /path/to/index_mb_gt512.json
    """, formatter_class=argparse.RawTextHelpFormatter)
    mo_parser.add_argument('-c', '--config', type=str, default=None,
                           help='Configuration file path in a yaml format. Either --config or --src must be provided.')
    mo_parser.add_argument('-s', '--src', type=str, nargs='+', default=None,
                           help='Source index files. Either --config or --src must be provided.')
    common_args(mo_parser)
    mo_parser.add_argument('--base-size', type=int, default=None, help="Base size. Typically set as 256/512/1024 according to image size you train model.")
    mo_parser.add_argument('--reso-step', type=int, default=None,
                           help="Resolution step. Either reso_step or target_ratios must be provided.")
    mo_parser.add_argument('--target-ratios', type=str, nargs='+', default=None,
                           help="Target ratios. Either reso_step or target_ratios must be provided.")
    mo_parser.add_argument('--md5-file', type=str, default=None,
                           help='You can provide an md5 to height and width file to accelerate the process. '
                                'It is a pickle file that contains a dict, which maps md5 to (height, width) tuple.')
    mo_parser.add_argument('--align', type=int, default=16, help="Used when --target-ratios is provided. Align size of source image height and width.")
    mo_parser.add_argument('--min-size', type=int, default=0,
                           help="Minimum size. Images smaller than this size will be ignored.")

    # Common
    parser.add_argument('-v', '--version', action='version', version=f'%(prog)s {__version__}')

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = get_args()
    if args.task == 'show':
        show_index_info(args.src,
                        args.arrow_files,
                        args.depth,
                        )
    elif args.task == 'base':
        startup(args.config,
                args.target,
                args.world_size,
                args.work_dir,
                use_cache=args.use_cache,
                )
    elif args.task == 'multireso':
        make_multireso(args.target,
                       args.config,
                       args.src,
                       args.base_size,
                       args.reso_step,
                       args.target_ratios,
                       args.align,
                       args.min_size,
                       args.md5_file,
                       )