NAS Benchmark (#2578)

* Adding NAS Benchmark (201) * Add missing endline * Update script * Draft for NAS-Bench-101 * Update NAS-Bench-101 * Update constants * Add API * Update API * Fix typo * Draft for NDS * Fix issues in storing loss * Fix cell_spec problem * Finalize NDS * Update time consumption * Add nds query function * Update documentation for NAS-Bench-101 * Reformat generators * Add NAS-Bench-201 docs * Unite constant names * Update docstring * Update docstring * Update rst * Update scripts * Add git as dependency * Apt update * Update installation scripts * Fix dependency for pipeline * Fix NDS script * Fix NAS-Bench-201 installation * Add example notebook * Correct latency dimension * shortcuts -> query * Change run -> trial, ComputedStats -> TrialStats * ipynb needs re-generation * Fix NAS rst * Fix documentation and pylint * Fix pylint * Add pandoc as dependency * Update pandoc dependency * Fix documentation broken link

NAS Benchmark (#2578)
* Adding NAS Benchmark (201) * Add missing endline * Update script * Draft for NAS-Bench-101 * Update NAS-Bench-101 * Update constants * Add API * Update API * Fix typo * Draft for NDS * Fix issues in storing loss * Fix cell_spec problem * Finalize NDS * Update time consumption * Add nds query function * Update documentation for NAS-Bench-101 * Reformat generators * Add NAS-Bench-201 docs * Unite constant names * Update docstring * Update docstring * Update rst * Update scripts * Add git as dependency * Apt update * Update installation scripts * Fix dependency for pipeline * Fix NDS script * Fix NAS-Bench-201 installation * Add example notebook * Correct latency dimension * shortcuts -> query * Change run -> trial, ComputedStats -> TrialStats * ipynb needs re-generation * Fix NAS rst * Fix documentation and pylint * Fix pylint * Add pandoc as dependency * Update pandoc dependency * Fix documentation broken link
b91aba39 · Yuge Zhang · GitHub · 3fb49d9b · b91aba39 · b91aba39
Unverified Commit b91aba39 authored Jun 29, 2020 by Yuge Zhang Committed by GitHub Jun 29, 2020
10 changed files
--- a/src/sdk/pynni/nni/nas/benchmarks/nasbench201/__init__.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nasbench201/__init__.py
+from .constants import NONE, SKIP_CONNECT, CONV_1X1, CONV_3X3, AVG_POOL_3X3
+from .model import Nb201TrialStats, Nb201IntermediateStats, Nb201TrialConfig
+from .query import query_nb201_trial_stats
--- a/src/sdk/pynni/nni/nas/benchmarks/nasbench201/constants.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nasbench201/constants.py
+NONE = 'none'
+SKIP_CONNECT = 'skip_connect'
+CONV_1X1 = 'conv_1x1'
+CONV_3X3 = 'conv_3x3'
+AVG_POOL_3X3 = 'avg_pool_3x3'
+PRIMITIVES = [
+    NONE,
+    SKIP_CONNECT,
+    CONV_1X1,
+    CONV_3X3,
+    AVG_POOL_3X3,
+]
--- a/src/sdk/pynni/nni/nas/benchmarks/nasbench201/db_gen.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nasbench201/db_gen.py
+import argparse
+import re
+
+import tqdm
+import torch
+
+from .constants import NONE, SKIP_CONNECT, CONV_1X1, CONV_3X3, AVG_POOL_3X3
+from .model import db, Nb201TrialConfig, Nb201TrialStats, Nb201IntermediateStats
+
+
+def parse_arch_str(arch_str):
+    mp = {
+        'none': NONE,
+        'skip_connect': SKIP_CONNECT,
+        'nor_conv_1x1': CONV_1X1,
+        'nor_conv_3x3': CONV_3X3,
+        'avg_pool_3x3': AVG_POOL_3X3
+    }
+    m = re.match(r'\|(.*)~0\|\+\|(.*)~0\|(.*)~1\|\+\|(.*)~0\|(.*)~1\|(.*)~2\|', arch_str)
+    return {
+        '0_1': mp[m.group(1)],
+        '0_2': mp[m.group(2)],
+        '1_2': mp[m.group(3)],
+        '0_3': mp[m.group(4)],
+        '1_3': mp[m.group(5)],
+        '2_3': mp[m.group(6)]
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file',
+                        help='Path to the file to be converted, e.g., NAS-Bench-201-v1_1-096897.pth.')
+    args = parser.parse_args()
+    dataset_split = {
+        'cifar10-valid': ['train', 'x-valid', 'ori-test', 'ori-test'],
+        'cifar10': ['train', 'ori-test', 'ori-test', 'ori-test'],
+        'cifar100': ['train', 'x-valid', 'x-test', 'ori-test'],
+        'imagenet16-120': ['train', 'x-valid', 'x-test', 'ori-test'],
+    }
+
+    with db:
+        db.create_tables([Nb201TrialConfig, Nb201TrialStats, Nb201IntermediateStats])
+        print('Loading NAS-Bench-201 pickle...')
+        nb201_data = torch.load(args.input_file)
+        print('Dumping architectures...')
+        for arch_str in nb201_data['meta_archs']:
+            arch_json = parse_arch_str(arch_str)
+            for epochs in [12, 200]:
+                for dataset in Nb201TrialConfig.dataset.choices:
+                    Nb201TrialConfig.create(arch=arch_json, num_epochs=epochs, dataset=dataset,
+                                            num_channels=16, num_cells=5)
+        for arch_info in tqdm.tqdm(nb201_data['arch2infos'].values(),
+                                   desc='Processing architecture statistics'):
+            for epochs_verb, d in arch_info.items():
+                if epochs_verb == 'less':
+                    epochs = 12
+                else:
+                    epochs = 200
+                arch_json = parse_arch_str(d['arch_str'])
+                for (dataset, seed), r in d['all_results'].items():
+                    sp = dataset_split[dataset.lower()]
+                    data_parsed = {
+                        'train_acc': r['train_acc1es'][epochs - 1],
+                        'valid_acc': r['eval_acc1es']['{}@{}'.format(sp[1], epochs - 1)],
+                        'test_acc': r['eval_acc1es']['{}@{}'.format(sp[2], epochs - 1)],
+                        'ori_test_acc': r['eval_acc1es']['{}@{}'.format(sp[3], epochs - 1)],
+                        'train_loss': r['train_losses'][epochs - 1],
+                        'valid_loss': r['eval_losses']['{}@{}'.format(sp[1], epochs - 1)],
+                        'test_loss': r['eval_losses']['{}@{}'.format(sp[2], epochs - 1)],
+                        'ori_test_loss': r['eval_losses']['{}@{}'.format(sp[3], epochs - 1)],
+                        'parameters': r['params'],
+                        'flops': r['flop'],
+                        'latency': r['latency'][0],
+                        'training_time': r['train_times'][epochs - 1] * epochs,
+                        'valid_evaluation_time': r['eval_times']['{}@{}'.format(sp[1], epochs - 1)],
+                        'test_evaluation_time': r['eval_times']['{}@{}'.format(sp[2], epochs - 1)],
+                        'ori_test_evaluation_time': r['eval_times']['{}@{}'.format(sp[3], epochs - 1)],
+                    }
+                    config = Nb201TrialConfig.get(
+                        (Nb201TrialConfig.num_epochs == epochs) &
+                        (Nb201TrialConfig.arch == arch_json) &
+                        (Nb201TrialConfig.dataset == dataset.lower())
+                    )
+                    trial_stats = Nb201TrialStats.create(config=config, seed=seed, **data_parsed)
+                    intermediate_stats = []
+                    for epoch in range(epochs):
+                        data_parsed = {
+                            'train_acc': r['train_acc1es'].get(epoch),
+                            'valid_acc': r['eval_acc1es'].get('{}@{}'.format(sp[1], epoch)),
+                            'test_acc': r['eval_acc1es'].get('{}@{}'.format(sp[2], epoch)),
+                            'ori_test_acc': r['eval_acc1es'].get('{}@{}'.format(sp[3], epoch)),
+                            'train_loss': r['train_losses'].get(epoch),
+                            'valid_loss': r['eval_losses'].get('{}@{}'.format(sp[1], epoch)),
+                            'test_loss': r['eval_losses'].get('{}@{}'.format(sp[2], epoch)),
+                            'ori_test_loss': r['eval_losses'].get('{}@{}'.format(sp[3], epoch)),
+                        }
+                        if all([v is None for v in data_parsed.values()]):
+                            continue
+                        data_parsed.update(current_epoch=epoch + 1, trial=trial_stats)
+                        intermediate_stats.append(data_parsed)
+                    Nb201IntermediateStats.insert_many(intermediate_stats).execute(db)
+
+
+if __name__ == '__main__':
+    main()
--- a/src/sdk/pynni/nni/nas/benchmarks/nasbench201/model.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nasbench201/model.py
+import os
+
+from peewee import CharField, FloatField, ForeignKeyField, IntegerField, Model
+from playhouse.sqlite_ext import JSONField, SqliteExtDatabase
+
+from nni.nas.benchmarks.constants import DATABASE_DIR
+
+db = SqliteExtDatabase(os.path.join(DATABASE_DIR, 'nasbench201.db'), autoconnect=True)
+
+
+class Nb201TrialConfig(Model):
+    """
+    Trial config for NAS-Bench-201.
+
+    Attributes
+    ----------
+    arch : dict
+        A dict with keys ``0_1``, ``0_2``, ``0_3``, ``1_2``, ``1_3``, ``2_3``, each of which
+        is an operator chosen from :const:`nni.nas.benchmark.nasbench201.NONE`,
+        :const:`nni.nas.benchmark.nasbench201.SKIP_CONNECT`,
+        :const:`nni.nas.benchmark.nasbench201.CONV_1X1`,
+        :const:`nni.nas.benchmark.nasbench201.CONV_3X3` and :const:`nni.nas.benchmark.nasbench201.AVG_POOL_3X3`.
+    num_epochs : int
+        Number of epochs planned for this trial. Should be one of 12 and 200.
+    num_channels: int
+        Number of channels for initial convolution. 16 by default.
+    num_cells: int
+        Number of cells per stage. 5 by default.
+    dataset: str
+        Dataset used for training and evaluation. NAS-Bench-201 provides the following 4 options:
+        ``cifar10-valid`` (training data is splited into 25k for training and 25k for validation,
+        validation data is used for test), ``cifar10`` (training data is used in training, validation
+        data is splited into 25k for validation and 25k for testing), ``cifar100`` (same protocol as ``cifar10``),
+        and ``imagenet16-120`` (a subset of 120 classes in ImageNet, downscaled to 16x16, using training data
+        for training, 6k images from validation set for validation and the other 6k for testing).
+    """
+
+    arch = JSONField(index=True)
+    num_epochs = IntegerField(index=True)
+    num_channels = IntegerField()
+    num_cells = IntegerField()
+    dataset = CharField(max_length=20, index=True, choices=[
+        'cifar10-valid',  # 25k+25k+10k
+        'cifar10',  # 50k+5k+5k
+        'cifar100',  # 50k+5k+5k
+        'imagenet16-120',
+    ])
+
+    class Meta:
+        database = db
+
+
+class Nb201TrialStats(Model):
+    """
+    Computation statistics for NAS-Bench-201. Each corresponds to one trial.
+
+    Attributes
+    ----------
+    config : Nb201TrialConfig
+        Setup for this trial data.
+    seed : int
+        Random seed selected, for reproduction.
+    train_acc : float
+        Final accuracy on training data, ranging from 0 to 100.
+    valid_acc : float
+        Final accuracy on validation data, ranging from 0 to 100.
+    test_acc : float
+        Final accuracy on test data, ranging from 0 to 100.
+    ori_test_acc : float
+        Test accuracy on original validation set (10k for CIFAR and 12k for Imagenet16-120),
+        ranging from 0 to 100.
+    train_loss : float or None
+        Final cross entropy loss on training data. Note that loss could be NaN, in which case
+        this attributed will be None.
+    valid_loss : float or None
+        Final cross entropy loss on validation data.
+    test_loss : float or None
+        Final cross entropy loss on test data.
+    ori_test_loss : float or None
+        Final cross entropy loss on original validation set.
+    parameters : float
+        Number of trainable parameters in million.
+    latency : float
+        Latency in seconds.
+    flops : float
+        FLOPs in million.
+    training_time : float
+        Duration of training in seconds.
+    valid_evaluation_time : float
+        Time elapsed to evaluate on validation set.
+    test_evaluation_time : float
+        Time elapsed to evaluate on test set.
+    ori_test_evaluation_time : float
+        Time elapsed to evaluate on original test set.
+    """
+    config = ForeignKeyField(Nb201TrialConfig, backref='trial_stats', index=True)
+    seed = IntegerField()
+    train_acc = FloatField()
+    valid_acc = FloatField()
+    test_acc = FloatField()
+    ori_test_acc = FloatField()  # test accuracy of the original test set
+    train_loss = FloatField(null=True)  # possibly nan
+    valid_loss = FloatField(null=True)
+    test_loss = FloatField(null=True)
+    ori_test_loss = FloatField(null=True)
+    parameters = FloatField()  # parameters in million
+    latency = FloatField()  # latency in milliseconds
+    flops = FloatField()  # flops in million
+    training_time = FloatField()
+    valid_evaluation_time = FloatField()
+    test_evaluation_time = FloatField()
+    ori_test_evaluation_time = FloatField()
+
+    class Meta:
+        database = db
+
+
+class Nb201IntermediateStats(Model):
+    """
+    Intermediate statistics for NAS-Bench-201.
+
+    Attributes
+    ----------
+    trial : Nb201TrialStats
+        Corresponding trial.
+    current_epoch : int
+        Elapsed epochs.
+    train_acc : float
+        Current accuracy on training data, ranging from 0 to 100.
+    valid_acc : float
+        Current accuracy on validation data, ranging from 0 to 100.
+    test_acc : float
+        Current accuracy on test data, ranging from 0 to 100.
+    ori_test_acc : float
+        Test accuracy on original validation set (10k for CIFAR and 12k for Imagenet16-120),
+        ranging from 0 to 100.
+    train_loss : float or None
+        Current cross entropy loss on training data.
+    valid_loss : float or None
+        Current cross entropy loss on validation data.
+    test_loss : float or None
+        Current cross entropy loss on test data.
+    ori_test_loss : float or None
+        Current cross entropy loss on original validation set.
+    """
+
+    trial = ForeignKeyField(Nb201TrialStats, backref='intermediates', index=True)
+    current_epoch = IntegerField(index=True)
+    train_acc = FloatField(null=True)
+    valid_acc = FloatField(null=True)
+    test_acc = FloatField(null=True)
+    ori_test_acc = FloatField(null=True)
+    train_loss = FloatField(null=True)
+    valid_loss = FloatField(null=True)
+    test_loss = FloatField(null=True)
+    ori_test_loss = FloatField(null=True)
+
+    class Meta:
+        database = db
--- a/src/sdk/pynni/nni/nas/benchmarks/nasbench201/query.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nasbench201/query.py
+import functools
+
+from peewee import fn
+from playhouse.shortcuts import model_to_dict
+from .model import Nb201TrialStats, Nb201TrialConfig
+
+
+def query_nb201_trial_stats(arch, num_epochs, dataset, reduction=None):
+    """
+    Query trial stats of NAS-Bench-201 given conditions.
+
+    Parameters
+    ----------
+    arch : dict or None
+        If a dict, it is in the format that is described in
+        :class:`nni.nas.benchmark.nasbench201.Nb201TrialConfig`. Only trial stats
+        matched will be returned. If none, architecture will be a wildcard.
+    num_epochs : int or None
+        If int, matching results will be returned. Otherwise a wildcard.
+    dataset : str or None
+        If specified, can be one of the dataset available in :class:`nni.nas.benchmark.nasbench201.Nb201TrialConfig`.
+        Otherwise a wildcard.
+    reduction : str or None
+        If 'none' or None, all trial stats will be returned directly.
+        If 'mean', fields in trial stats will be averaged given the same trial config.
+
+    Returns
+    -------
+    generator of dict
+        A generator of :class:`nni.nas.benchmark.nasbench201.Nb201TrialStats` objects,
+        where each of them has been converted into a dict.
+    """
+    fields = []
+    if reduction == 'none':
+        reduction = None
+    if reduction == 'mean':
+        for field_name in Nb201TrialStats._meta.sorted_field_names:
+            if field_name not in ['id', 'config', 'seed']:
+                fields.append(fn.AVG(getattr(Nb201TrialStats, field_name)).alias(field_name))
+    elif reduction is None:
+        fields.append(Nb201TrialStats)
+    else:
+        raise ValueError('Unsupported reduction: \'%s\'' % reduction)
+    query = Nb201TrialStats.select(*fields, Nb201TrialConfig).join(Nb201TrialConfig)
+    conditions = []
+    if arch is not None:
+        conditions.append(Nb201TrialConfig.arch == arch)
+    if num_epochs is not None:
+        conditions.append(Nb201TrialConfig.num_epochs == num_epochs)
+    if dataset is not None:
+        conditions.append(Nb201TrialConfig.dataset == dataset)
+    if conditions:
+        query = query.where(functools.reduce(lambda a, b: a & b, conditions))
+    if reduction is not None:
+        query = query.group_by(Nb201TrialStats.config)
+    for k in query:
+        yield model_to_dict(k)
--- a/src/sdk/pynni/nni/nas/benchmarks/nds/__init__.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nds/__init__.py
+from .constants import *
+from .model import NdsTrialConfig, NdsTrialStats, NdsIntermediateStats
+from .query import query_nds_trial_stats
--- a/src/sdk/pynni/nni/nas/benchmarks/nds/constants.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nds/constants.py
+NONE = 'none'
+SKIP_CONNECT = 'skip_connect'
+AVG_POOL_3X3 = 'avg_pool_3x3'
+MAX_POOL_3X3 = 'max_pool_3x3'
+MAX_POOL_5X5 = 'max_pool_5x5'
+MAX_POOL_7X7 = 'max_pool_7x7'
+CONV_1X1 = 'conv_1x1'
+CONV_3X3 = 'conv_3x3'
+CONV_3X1_1X3 = 'conv_3x1_1x3'
+CONV_7X1_1X7 = 'conv_7x1_1x7'
+DIL_CONV_3X3 = 'dil_conv_3x3'
+DIL_CONV_5X5 = 'dil_conv_5x5'
+SEP_CONV_3X3 = 'sep_conv_3x3'
+SEP_CONV_5X5 = 'sep_conv_5x5'
+SEP_CONV_7X7 = 'sep_conv_7x7'
+DIL_SEP_CONV_3X3 = 'dil_sep_conv_3x3'
--- a/src/sdk/pynni/nni/nas/benchmarks/nds/db_gen.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nds/db_gen.py
+import json
+import argparse
+import os
+
+import numpy as np
+import tqdm
+
+from .model import db, NdsTrialConfig, NdsTrialStats, NdsIntermediateStats
+
+
+def inject_item(db, item, proposer, dataset, generator):
+    if 'genotype' in item['net']:
+        model_family = 'nas_cell'
+        num_nodes_normal = len(item['net']['genotype']['normal']) // 2
+        num_nodes_reduce = len(item['net']['genotype']['reduce']) // 2
+        model_spec = {
+            'num_nodes_normal': num_nodes_normal,
+            'num_nodes_reduce': num_nodes_reduce,
+            'depth': item['net']['depth'],
+            'width': item['net']['width'],
+            'aux': item['net']['aux'],
+            'drop_prob': item['net']['drop_prob'],
+        }
+        cell_spec = {}
+        for cell_type in ['normal', 'reduce']:
+            for i in range(num_nodes_normal):
+                for j, label in enumerate(['x', 'y']):
+                    cell_spec['{}_{}_op_{}'.format(cell_type, i, label)] = \
+                        item['net']['genotype'][cell_type][i * 2 + j][0]
+                    cell_spec['{}_{}_input_{}'.format(cell_type, i, label)] = \
+                        item['net']['genotype'][cell_type][i * 2 + j][1]
+            cell_spec['{}_concat'.format(cell_type)] = item['net']['genotype']['{}_concat'.format(cell_type)]
+    else:
+        if item['net']['block_type'].startswith('res_bottleneck'):
+            model_family = 'residual_bottleneck'
+        elif item['net']['block_type'].startswith('res_basic'):
+            model_family = 'residual_basic'
+        elif item['net']['block_type'].startswith('double_plain'):
+            model_family = 'vanilla'
+        else:
+            raise ValueError('Unrecognized block type')
+        model_spec = {k: v for k, v in item['net'].items() if v and k != 'block_type'}
+        cell_spec = {}
+    trial_config, _ = NdsTrialConfig.get_or_create(
+        model_family=model_family,
+        model_spec=model_spec,
+        cell_spec=cell_spec,
+        proposer=proposer,
+        base_lr=item['optim']['base_lr'],
+        weight_decay=item['optim']['wd'],
+        num_epochs=item['optim']['max_ep'],
+        dataset=dataset,
+        generator=generator
+    )
+    assert len(item['train_ep_top1']) == len(item['test_ep_top1']) == trial_config.num_epochs
+    trial = NdsTrialStats.create(
+        config=trial_config,
+        seed=item['rng_seed'],
+        final_train_acc=100 - item['train_ep_top1'][-1],
+        final_train_loss=item['train_ep_loss'][-1],
+        final_test_acc=100 - item['test_ep_top1'][-1],
+        best_train_acc=100 - min(item['train_ep_top1']),
+        best_train_loss=np.nanmin(item['train_ep_loss']).item(),
+        best_test_acc=100 - min(item['test_ep_top1']),
+        parameters=item['params'] / 1e6,
+        flops=item['flops'] / 1e6,
+        iter_time=item['iter_time']
+    )
+    intermediate_stats = []
+    for i in range(trial_config.num_epochs):
+        intermediate_stats.append({
+            'trial': trial,
+            'current_epoch': i + 1,
+            'train_loss': item['train_ep_loss'][i],
+            'train_acc': 100 - item['train_ep_top1'][i],
+            'test_acc': 100 - item['test_ep_top1'][i]
+        })
+    NdsIntermediateStats.insert_many(intermediate_stats).execute(db)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_dir', help='Path to extracted NDS data dir.')
+    args = parser.parse_args()
+
+    sweep_list = [
+        'Amoeba.json',
+        'Amoeba_in.json',
+        'DARTS.json',
+        'DARTS_fix-w-d.json',
+        'DARTS_in.json',
+        'DARTS_lr-wd.json',
+        'DARTS_lr-wd_in.json',
+        'ENAS.json',
+        'ENAS_fix-w-d.json',
+        'ENAS_in.json',
+        'NASNet.json',
+        'NASNet_in.json',
+        'PNAS.json',
+        'PNAS_fix-w-d.json',
+        'PNAS_in.json',
+        'ResNeXt-A.json',
+        'ResNeXt-A_in.json',
+        'ResNeXt-B.json',
+        'ResNeXt-B_in.json',
+        'ResNet-B.json',
+        'ResNet.json',
+        'ResNet_lr-wd.json',
+        'ResNet_lr-wd_in.json',
+        'ResNet_reruns.json',
+        'ResNet_rng1.json',
+        'ResNet_rng2.json',
+        'ResNet_rng3.json',
+        'Vanilla.json',
+        'Vanilla_lr-wd.json',
+        'Vanilla_lr-wd_in.json',
+        'Vanilla_reruns.json',
+        'Vanilla_rng1.json',
+        'Vanilla_rng2.json',
+        'Vanilla_rng3.json'
+    ]
+
+    with db:
+        db.create_tables([NdsTrialConfig, NdsTrialStats, NdsIntermediateStats])
+        for json_idx, json_file in enumerate(sweep_list, start=1):
+            if 'fix-w-d' in json_file:
+                generator = 'fix_w_d'
+            elif 'lr-wd' in json_file:
+                generator = 'tune_lr_wd'
+            else:
+                generator = 'random'
+            if '_in' in json_file:
+                dataset = 'imagenet'
+            else:
+                dataset = 'cifar10'
+            proposer = json_file.split(".")[0].split("_")[0].lower()
+            with open(os.path.join(args.input_dir, json_file), 'r') as f:
+                data = json.load(f)
+            if 'top' in data and 'mid' in data:
+                for t in tqdm.tqdm(data['top'],
+                                   desc='[{}/{}] Processing {} (top)'.format(json_idx, len(sweep_list), json_file)):
+                    inject_item(db, t, proposer, dataset, generator)
+                for t in tqdm.tqdm(data['mid'],
+                                   desc='[{}/{}] Processing {} (mid)'.format(json_idx, len(sweep_list), json_file)):
+                    inject_item(db, t, proposer, dataset, generator)
+            else:
+                for job in tqdm.tqdm(data,
+                                     desc='[{}/{}] Processing {}'.format(json_idx, len(sweep_list), json_file)):
+                    inject_item(db, job, proposer, dataset, generator)
+
+
+if __name__ == '__main__':
+    main()
--- a/src/sdk/pynni/nni/nas/benchmarks/nds/model.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nds/model.py
+import os
+
+from peewee import CharField, FloatField, ForeignKeyField, IntegerField, Model
+from playhouse.sqlite_ext import JSONField, SqliteExtDatabase
+
+from nni.nas.benchmarks.constants import DATABASE_DIR
+
+db = SqliteExtDatabase(os.path.join(DATABASE_DIR, 'nds.db'), autoconnect=True)
+
+
+class NdsTrialConfig(Model):
+    """
+    Trial config for NDS.
+
+    Attributes
+    ----------
+    model_family : str
+        Could be ``nas_cell``, ``residual_bottleneck``, ``residual_basic`` or ``vanilla``.
+    model_spec : dict
+        If ``model_family`` is ``nas_cell``, it contains ``num_nodes_normal``, ``num_nodes_reduce``, ``depth``,
+        ``width``, ``aux`` and ``drop_prob``. If ``model_family`` is ``residual_bottleneck``, it contains ``bot_muls``,
+        ``ds`` (depths), ``num_gs`` (number of groups) and ``ss`` (strides). If ``model_family`` is ``residual_basic`` or
+        ``vanilla``, it contains ``ds``, ``ss`` and ``ws``.
+    cell_spec : dict
+        If ``model_family`` is not ``nas_cell`` it will be an empty dict. Otherwise, it specifies
+        ``<normal/reduce>_<i>_<op/input>_<x/y>``, where i ranges from 0 to ``num_nodes_<normal/reduce> - 1``.
+        If it is an ``op``, the value is chosen from the constants specified previously like :const:`nni.nas.benchmark.nds.CONV_1X1`.
+        If it is i's ``input``, the value range from 0 to ``i + 1``, as ``nas_cell`` uses previous two nodes as inputs, and
+        node 0 is actually the second node. Refer to NASNet paper for details. Finally, another two key-value pairs
+        ``normal_concat`` and ``reduce_concat`` specify which nodes are eventually concatenated into output.
+    dataset : str
+        Dataset used. Could be ``cifar10`` or ``imagenet``.
+    generator : str
+        Can be one of ``random`` which generates configurations at random, while keeping learning rate and weight decay fixed,
+        ``fix_w_d`` which further keeps ``width`` and ``depth`` fixed, only applicable for ``nas_cell``. ``tune_lr_wd`` which
+        further tunes learning rate and weight decay.
+    proposer : str
+        Paper who has proposed the distribution for random sampling. Available proposers include ``nasnet``, ``darts``, ``enas``,
+        ``pnas``, ``amoeba``, ``vanilla``, ``resnext-a``, ``resnext-b``, ``resnet``, ``resnet-b`` (ResNet with bottleneck).
+        See NDS paper for details.
+    base_lr : float
+        Initial learning rate.
+    weight_decay : float
+        L2 weight decay applied on weights.
+    num_epochs : int
+        Number of epochs scheduled, during which learning rate will decay to 0 following cosine annealing.
+    """
+
+    model_family = CharField(max_length=20, index=True, choices=[
+        'nas_cell',
+        'residual_bottleneck',
+        'residual_basic',
+        'vanilla',
+    ])
+    model_spec = JSONField(index=True)
+    cell_spec = JSONField(index=True, null=True)
+    dataset = CharField(max_length=15, index=True, choices=['cifar10', 'imagenet'])
+    generator = CharField(max_length=15, index=True, choices=[
+        'random',
+        'fix_w_d',
+        'tune_lr_wd',
+    ])
+    proposer = CharField(max_length=15, index=True)
+    base_lr = FloatField()
+    weight_decay = FloatField()
+    num_epochs = IntegerField()
+
+    class Meta:
+        database = db
+
+
+class NdsTrialStats(Model):
+    """
+    Computation statistics for NDS. Each corresponds to one trial.
+
+    Attributes
+    ----------
+    config : NdsTrialConfig
+        Corresponding config for trial.
+    seed : int
+        Random seed selected, for reproduction.
+    final_train_acc : float
+        Final accuracy on training data, ranging from 0 to 100.
+    final_train_loss : float or None
+        Final cross entropy loss on training data. Could be NaN (None).
+    final_test_acc : float
+        Final accuracy on test data, ranging from 0 to 100.
+    best_train_acc : float
+        Best accuracy on training data, ranging from 0 to 100.
+    best_train_loss : float or None
+        Best cross entropy loss on training data. Could be NaN (None).
+    best_test_acc : float
+        Best accuracy on test data, ranging from 0 to 100.
+    parameters : float
+        Number of trainable parameters in million.
+    flops : float
+        FLOPs in million.
+    iter_time : float
+        Seconds elapsed for each iteration.
+    """
+    config = ForeignKeyField(NdsTrialConfig, backref='trial_stats', index=True)
+    seed = IntegerField()
+    final_train_acc = FloatField()
+    final_train_loss = FloatField(null=True)
+    final_test_acc = FloatField()
+    best_train_acc = FloatField()
+    best_train_loss = FloatField(null=True)
+    best_test_acc = FloatField()
+    parameters = FloatField()
+    flops = FloatField()
+    iter_time = FloatField()
+
+    class Meta:
+        database = db
+
+
+class NdsIntermediateStats(Model):
+    """
+    Intermediate statistics for NDS.
+
+    Attributes
+    ----------
+    trial : NdsTrialStats
+        Corresponding trial.
+    current_epoch : int
+        Elapsed epochs.
+    train_loss : float or None
+        Current cross entropy loss on training data. Can be NaN (None).
+    train_acc : float
+        Current accuracy on training data, ranging from 0 to 100.
+    test_acc : float
+        Current accuracy on test data, ranging from 0 to 100.
+    """
+
+    trial = ForeignKeyField(NdsTrialStats, backref='intermediates', index=True)
+    current_epoch = IntegerField(index=True)
+    train_loss = FloatField(null=True)
+    train_acc = FloatField()
+    test_acc = FloatField()
+
+    class Meta:
+        database = db
--- a/src/sdk/pynni/nni/nas/benchmarks/nds/query.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nds/query.py
+import functools
+
+from peewee import fn
+from playhouse.shortcuts import model_to_dict
+from .model import NdsTrialStats, NdsTrialConfig
+
+
+def query_nds_trial_stats(model_family, proposer, generator, model_spec, cell_spec, dataset,
+                          num_epochs=None, reduction=None):
+    """
+    Query trial stats of NDS given conditions.
+
+    Parameters
+    ----------
+    model_family : str or None
+        If str, can be one of the model families available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`.
+        Otherwise a wildcard.
+    proposer : str or None
+        If str, can be one of the proposers available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`. Otherwise a wildcard.
+    generator : str or None
+        If str, can be one of the generators available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`. Otherwise a wildcard.
+    model_spec : dict or None
+        If specified, can be one of the model spec available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`.
+        Otherwise a wildcard.
+    cell_spec : dict or None
+        If specified, can be one of the cell spec available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`.
+        Otherwise a wildcard.
+    dataset : str or None
+        If str, can be one of the datasets available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`. Otherwise a wildcard.
+    num_epochs : float or None
+        If int, matching results will be returned. Otherwise a wildcard.
+    reduction : str or None
+        If 'none' or None, all trial stats will be returned directly.
+        If 'mean', fields in trial stats will be averaged given the same trial config.
+
+    Returns
+    -------
+    generator of dict
+        A generator of :class:`nni.nas.benchmark.nds.NdsTrialStats` objects,
+        where each of them has been converted into a dict.
+    """
+    fields = []
+    if reduction == 'none':
+        reduction = None
+    if reduction == 'mean':
+        for field_name in NdsTrialStats._meta.sorted_field_names:
+            if field_name not in ['id', 'config', 'seed']:
+                fields.append(fn.AVG(getattr(NdsTrialStats, field_name)).alias(field_name))
+    elif reduction is None:
+        fields.append(NdsTrialStats)
+    else:
+        raise ValueError('Unsupported reduction: \'%s\'' % reduction)
+    query = NdsTrialStats.select(*fields, NdsTrialConfig).join(NdsTrialConfig)
+    conditions = []
+    for field_name in ['model_family', 'proposer', 'generator', 'model_spec', 'cell_spec',
+                       'dataset', 'num_epochs']:
+        if locals()[field_name] is not None:
+            conditions.append(getattr(NdsTrialConfig, field_name) == locals()[field_name])
+    if conditions:
+        query = query.where(functools.reduce(lambda a, b: a & b, conditions))
+    if reduction is not None:
+        query = query.group_by(NdsTrialStats.config)
+    for k in query:
+        yield model_to_dict(k)