"""Prepare PASCAL VOC datasets""" import os import shutil import argparse import tarfile from encoding.utils import download, mkdir _TARGET_DIR = os.path.expanduser('~/.encoding/data') def parse_args(): parser = argparse.ArgumentParser( description='Initialize PASCAL VOC dataset.', epilog='Example: python prepare_pascal.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrputed') args = parser.parse_args() return args def download_voc(path, overwrite=False): _DOWNLOAD_URLS = [ ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')] download_dir = os.path.join(path, 'downloads') mkdir(download_dir) for url, checksum in _DOWNLOAD_URLS: filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: tar.extractall(path=path) def download_aug(path, overwrite=False): _AUG_DOWNLOAD_URLS = [ ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', '7129e0a480c2d6afb02b517bb18ac54283bfaa35')] download_dir = os.path.join(path, 'downloads') mkdir(download_dir) for url, checksum in _AUG_DOWNLOAD_URLS: filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: tar.extractall(path=path) shutil.move(os.path.join(path, 'benchmark_RELEASE'), os.path.join(path, 'VOCaug')) filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] # generate trainval.txt with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile: for fname in filenames: fname = os.path.join(path, fname) with open(fname) as infile: for line in infile: outfile.write(line) if __name__ == '__main__': args = parse_args() mkdir(os.path.expanduser('~/.encoding/datasets')) if args.download_dir is not None: if os.path.isdir(_TARGET_DIR): os.remove(_TARGET_DIR) os.symlink(args.download_dir, _TARGET_DIR) else: download_voc(_TARGET_DIR, overwrite=False) download_aug(_TARGET_DIR, overwrite=False)