"src/git@developer.sourcefind.cn:OpenDAS/tilelang.git" did not exist on "7e8d1f827369040b2c65bed65227a03333e018be"
Unverified Commit aa51e79c authored by QuanluZhang's avatar QuanluZhang Committed by GitHub
Browse files

support classic nas mode: each chosen arch as a separate trial job (#1775)

parent 6c1fe5c8
...@@ -22,6 +22,7 @@ nnictl support commands: ...@@ -22,6 +22,7 @@ nnictl support commands:
* [nnictl webui](#webui) * [nnictl webui](#webui)
* [nnictl tensorboard](#tensorboard) * [nnictl tensorboard](#tensorboard)
* [nnictl package](#package) * [nnictl package](#package)
* [nnictl ss_gen](#ss_gen)
* [nnictl --version](#version) * [nnictl --version](#version)
### Manage an experiment ### Manage an experiment
...@@ -733,6 +734,37 @@ Debug mode will disable version check function in Trialkeeper. ...@@ -733,6 +734,37 @@ Debug mode will disable version check function in Trialkeeper.
nnictl package show nnictl package show
``` ```
<a name="ss_gen"></a>
![](https://placehold.it/15/1589F0/000000?text=+) `Generate search space`
* __nnictl ss_gen__
* Description
Generate search space from user trial code which uses NNI NAS APIs.
* Usage
```bash
nnictl ss_gen [OPTIONS]
```
* Options
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|--trial_command| True| |The command of the trial code|
|--trial_dir| False| ./ |The directory of the trial code|
|--file| False| nni_auto_gen_search_space.json |The file for storing generated search space|
* Example
> Generate a search space
```bash
nnictl ss_gen --trial_command="python3 mnist.py" --trial_dir=./ --file=ss.json
```
<a name="version"></a> <a name="version"></a>
![](https://placehold.it/15/1589F0/000000?text=+) `Check NNI version` ![](https://placehold.it/15/1589F0/000000?text=+) `Check NNI version`
......
authorName: default
experimentName: example_mnist
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: local
#please use `nnictl ss_gen` to generate search space file first
searchSpacePath: <the_generated_search_space_path>
useAnnotation: False
tuner:
codeDir: ../../tuners/random_nas_tuner
classFileName: random_nas_tuner.py
className: RandomNASTuner
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
authorName: default
experimentName: example_mnist
trialConcurrency: 1
maxExecDuration: 100h
maxTrialNum: 1000
#choice: local, remote, pai
trainingServicePlatform: local
#please use `nnictl ss_gen` to generate search space file first
searchSpacePath: <the_generated_search_space_path>
useAnnotation: False
tuner:
builtinTunerName: PPOTuner
classArgs:
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
"""
A deep MNIST classifier using convolutional layers.
This file is a modification of the official pytorch mnist example:
https://github.com/pytorch/examples/blob/master/mnist/main.py
"""
import os
import argparse
import logging
import nni
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from nni.nas.pytorch.mutables import LayerChoice, InputChoice
from nni.nas.pytorch.classic_nas import get_and_apply_next_architecture
logger = logging.getLogger('mnist_AutoML')
class Net(nn.Module):
def __init__(self, hidden_size):
super(Net, self).__init__()
# two options of conv1
self.conv1 = LayerChoice([nn.Conv2d(1, 20, 5, 1),
nn.Conv2d(1, 20, 3, 1)],
key='first_conv')
# two options of mid_conv
self.mid_conv = LayerChoice([nn.Conv2d(20, 20, 3, 1, padding=1),
nn.Conv2d(20, 20, 5, 1, padding=2)],
key='mid_conv')
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4*4*50, hidden_size)
self.fc2 = nn.Linear(hidden_size, 10)
# skip connection over mid_conv
self.input_switch = InputChoice(n_candidates=2,
n_chosen=1,
key='skip')
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
old_x = x
x = F.relu(self.mid_conv(x))
zero_x = torch.zeros_like(old_x)
skip_x = self.input_switch([zero_x, old_x])
x = torch.add(x, skip_x)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 4*4*50)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args['log_interval'] == 0:
logger.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(args, model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += F.nll_loss(output, target, reduction='sum').item()
# get the index of the max log-probability
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
logger.info('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset), accuracy))
return accuracy
def main(args):
use_cuda = not args['no_cuda'] and torch.cuda.is_available()
torch.manual_seed(args['seed'])
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
#data_dir = os.path.join(args['data_dir'], nni.get_trial_id())
data_dir = os.path.join(args['data_dir'], 'data')
train_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args['batch_size'], shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST(data_dir, train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=1000, shuffle=True, **kwargs)
hidden_size = args['hidden_size']
model = Net(hidden_size=hidden_size).to(device)
get_and_apply_next_architecture(model)
optimizer = optim.SGD(model.parameters(), lr=args['lr'],
momentum=args['momentum'])
for epoch in range(1, args['epochs'] + 1):
train(args, model, device, train_loader, optimizer, epoch)
test_acc = test(args, model, device, test_loader)
if epoch < args['epochs']:
# report intermediate result
nni.report_intermediate_result(test_acc)
logger.debug('test accuracy %g', test_acc)
logger.debug('Pipe send intermediate result done.')
else:
# report final result
nni.report_final_result(test_acc)
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')
def get_params():
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument("--data_dir", type=str,
default='/tmp/tensorflow/mnist/input_data', help="data directory")
parser.add_argument('--batch_size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument("--hidden_size", type=int, default=512, metavar='N',
help='hidden layer size (default: 512)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--no_cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--log_interval', type=int, default=1000, metavar='N',
help='how many batches to wait before logging training status')
args, _ = parser.parse_known_args()
return args
if __name__ == '__main__':
try:
params = vars(get_params())
main(params)
except Exception as exception:
logger.exception(exception)
raise
...@@ -6,36 +6,27 @@ from nni.tuner import Tuner ...@@ -6,36 +6,27 @@ from nni.tuner import Tuner
def random_archi_generator(nas_ss, random_state): def random_archi_generator(nas_ss, random_state):
'''random '''random
''' '''
chosen_archi = {} chosen_arch = {}
for block_name, block_value in nas_ss.items(): for key, val in nas_ss.items():
assert block_value['_type'] == "mutable_layer", \ assert val['_type'] in ['layer_choice', 'input_choice'], \
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'" "Random NAS Tuner only receives NAS search space whose _type is 'layer_choice' or 'input_choice'"
block = block_value['_value'] if val['_type'] == 'layer_choice':
tmp_block = {} choices = val['_value']
for layer_name, layer in block.items(): index = random_state.randint(len(choices))
tmp_layer = {} chosen_arch[key] = {'_value': choices[index], '_idx': index}
for key, value in layer.items(): elif val['_type'] == 'input_choice':
if key == 'layer_choice': choices = val['_value']['candidates']
index = random_state.randint(len(value)) n_chosen = val['_value']['n_chosen']
tmp_layer['chosen_layer'] = value[index] chosen = []
elif key == 'optional_inputs': idxs = []
tmp_layer['chosen_inputs'] = [] for _ in range(n_chosen):
if layer['optional_inputs']: index = random_state.randint(len(choices))
if isinstance(layer['optional_input_size'], int): chosen.append(choices[index])
choice_num = layer['optional_input_size'] idxs.append(index)
else: chosen_arch[key] = {'_value': chosen, '_idx': idxs}
choice_range = layer['optional_input_size'] else:
choice_num = random_state.randint(choice_range[0], choice_range[1] + 1) raise ValueError('Unknown key %s and value %s' % (key, val))
for _ in range(choice_num): return chosen_arch
index = random_state.randint(len(layer['optional_inputs']))
tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index])
elif key == 'optional_input_size':
pass
else:
raise ValueError('Unknown field %s in layer %s of block %s' % (key, layer_name, block_name))
tmp_block[layer_name] = tmp_layer
chosen_archi[block_name] = tmp_block
return chosen_archi
class RandomNASTuner(Tuner): class RandomNASTuner(Tuner):
......
from .mutator import get_and_apply_next_architecture
import os
import sys
import json
import logging
import torch
import nni
from nni.env_vars import trial_env_vars
from nni.nas.pytorch.base_mutator import BaseMutator
from nni.nas.pytorch.mutables import LayerChoice, InputChoice
logger = logging.getLogger(__name__)
def get_and_apply_next_architecture(model):
"""
Wrapper of ClassicMutator to make it more meaningful,
similar to ```get_next_parameter``` for HPO.
Parameters
----------
model : pytorch model
user's model with search space (e.g., LayerChoice, InputChoice) embedded in it
"""
ClassicMutator(model)
class ClassicMutator(BaseMutator):
"""
This mutator is to apply the architecture chosen from tuner.
It implements the forward function of LayerChoice and InputChoice,
to only activate the chosen ones
"""
def __init__(self, model):
"""
Generate search space based on ```model```.
If env ```NNI_GEN_SEARCH_SPACE``` exists, this is in dry run mode for
generating search space for the experiment.
If not, there are still two mode, one is nni experiment mode where users
use ```nnictl``` to start an experiment. The other is standalone mode
where users directly run the trial command, this mode chooses the first
one(s) for each LayerChoice and InputChoice.
Parameters
----------
model : pytorch model
user's model with search space (e.g., LayerChoice, InputChoice) embedded in it
"""
super(ClassicMutator, self).__init__(model)
self.chosen_arch = {}
self.search_space = self._generate_search_space()
if 'NNI_GEN_SEARCH_SPACE' in os.environ:
# dry run for only generating search space
self._dump_search_space(self.search_space, os.environ.get('NNI_GEN_SEARCH_SPACE'))
sys.exit(0)
# get chosen arch from tuner
self.chosen_arch = nni.get_next_parameter()
if not self.chosen_arch and trial_env_vars.NNI_PLATFORM is None:
logger.warning('This is in standalone mode, the chosen are the first one(s)')
self.chosen_arch = self._standalone_generate_chosen()
self._validate_chosen_arch()
def _validate_chosen_arch(self):
pass
def _standalone_generate_chosen(self):
"""
Generate the chosen architecture for standalone mode,
i.e., choose the first one(s) for LayerChoice and InputChoice
{ key_name: {'_value': "conv1",
'_idx': 0} }
{ key_name: {'_value': ["in1"],
'_idx': [0]} }
Returns
-------
dict
the chosen architecture
"""
chosen_arch = {}
for key, val in self.search_space.items():
if val['_type'] == 'layer_choice':
choices = val['_value']
chosen_arch[key] = {'_value': choices[0], '_idx': 0}
elif val['_type'] == 'input_choice':
choices = val['_value']['candidates']
n_chosen = val['_value']['n_chosen']
chosen_arch[key] = {'_value': choices[:n_chosen], '_idx': list(range(n_chosen))}
else:
raise ValueError('Unknown key %s and value %s' % (key, val))
return chosen_arch
def _generate_search_space(self):
"""
Generate search space from mutables.
Here is the search space format:
{ key_name: {'_type': 'layer_choice',
'_value': ["conv1", "conv2"]} }
{ key_name: {'_type': 'input_choice',
'_value': {'candidates': ["in1", "in2"],
'n_chosen': 1}} }
Returns
-------
dict
the generated search space
"""
search_space = {}
for mutable in self.mutables:
# for now we only generate flattened search space
if isinstance(mutable, LayerChoice):
key = mutable.key
val = [repr(choice) for choice in mutable.choices]
search_space[key] = {"_type": "layer_choice", "_value": val}
elif isinstance(mutable, InputChoice):
key = mutable.key
search_space[key] = {"_type": "input_choice",
"_value": {"candidates": mutable.choose_from,
"n_chosen": mutable.n_chosen}}
else:
raise TypeError('Unsupported mutable type: %s.' % type(mutable))
return search_space
def _dump_search_space(self, search_space, file_path):
with open(file_path, 'w') as ss_file:
json.dump(search_space, ss_file)
def _tensor_reduction(self, reduction_type, tensor_list):
if tensor_list == "none":
return tensor_list
if not tensor_list:
return None # empty. return None for now
if len(tensor_list) == 1:
return tensor_list[0]
if reduction_type == "sum":
return sum(tensor_list)
if reduction_type == "mean":
return sum(tensor_list) / len(tensor_list)
if reduction_type == "concat":
return torch.cat(tensor_list, dim=1)
raise ValueError("Unrecognized reduction policy: \"{}\"".format(reduction_type))
def on_forward_layer_choice(self, mutable, *inputs):
"""
Implement the forward of LayerChoice
Parameters
----------
mutable: LayerChoice
inputs: list of torch.Tensor
Returns
-------
tuple
return of the chosen op, the index of the chosen op
"""
assert mutable.key in self.chosen_arch
val = self.chosen_arch[mutable.key]
assert isinstance(val, dict)
idx = val['_idx']
assert self.search_space[mutable.key]['_value'][idx] == val['_value']
return mutable.choices[idx](*inputs), idx
def on_forward_input_choice(self, mutable, tensor_list):
"""
Implement the forward of InputChoice
Parameters
----------
mutable: InputChoice
tensor_list: list of torch.Tensor
tags: list of string
Returns
-------
tuple of torch.Tensor and list
reduced tensor, mask list
"""
assert mutable.key in self.chosen_arch
val = self.chosen_arch[mutable.key]
assert isinstance(val, dict)
mask = [0 for _ in range(mutable.n_candidates)]
out = []
for i, idx in enumerate(val['_idx']):
# check whether idx matches the chosen candidate name
assert self.search_space[mutable.key]['_value']['candidates'][idx] == val['_value'][i]
out.append(tensor_list[idx])
mask[idx] = 1
return self._tensor_reduction(mutable.reduction, out), mask
...@@ -351,74 +351,33 @@ class PPOTuner(Tuner): ...@@ -351,74 +351,33 @@ class PPOTuner(Tuner):
self.send_trial_callback = None self.send_trial_callback = None
logger.info('Finished PPOTuner initialization') logger.info('Finished PPOTuner initialization')
def _process_one_nas_space(self, block_name, block_space):
"""
Process nas space to determine observation space and action space
Parameters
----------
block_name : str
The name of the mutable block
block_space : dict
Search space of this mutable block
Returns
-------
actions_spaces : list
List of the space of each action
actions_to_config : list
The mapping from action to generated configuration
"""
actions_spaces = []
actions_to_config = []
block_arch_temp = {}
for l_name, layer in block_space.items():
chosen_layer_temp = {}
if len(layer['layer_choice']) > 1:
actions_spaces.append(layer['layer_choice'])
actions_to_config.append((block_name, l_name, 'chosen_layer'))
chosen_layer_temp['chosen_layer'] = None
else:
assert len(layer['layer_choice']) == 1
chosen_layer_temp['chosen_layer'] = layer['layer_choice'][0]
if layer['optional_input_size'] not in [0, 1, [0, 1]]:
raise ValueError('Optional_input_size can only be 0, 1, or [0, 1], but the pecified one is %s'
% (layer['optional_input_size']))
if isinstance(layer['optional_input_size'], list):
actions_spaces.append(["None", *layer['optional_inputs']])
actions_to_config.append((block_name, l_name, 'chosen_inputs'))
chosen_layer_temp['chosen_inputs'] = None
elif layer['optional_input_size'] == 1:
actions_spaces.append(layer['optional_inputs'])
actions_to_config.append((block_name, l_name, 'chosen_inputs'))
chosen_layer_temp['chosen_inputs'] = None
elif layer['optional_input_size'] == 0:
chosen_layer_temp['chosen_inputs'] = []
else:
raise ValueError('invalid type and value of optional_input_size')
block_arch_temp[l_name] = chosen_layer_temp
self.chosen_arch_template[block_name] = block_arch_temp
return actions_spaces, actions_to_config
def _process_nas_space(self, search_space): def _process_nas_space(self, search_space):
"""
Process nas search space to get action/observation space
"""
actions_spaces = [] actions_spaces = []
actions_to_config = [] actions_to_config = []
for b_name, block in search_space.items(): for key, val in search_space.items():
if block['_type'] != 'mutable_layer': if val['_type'] == 'layer_choice':
raise ValueError('PPOTuner only accept mutable_layer type in search space, but the current one is %s'%(block['_type'])) actions_to_config.append((key, 'layer_choice'))
block = block['_value'] actions_spaces.append(val['_value'])
act, act_map = self._process_one_nas_space(b_name, block) self.chosen_arch_template[key] = None
actions_spaces.extend(act) elif val['_type'] == 'input_choice':
actions_to_config.extend(act_map) candidates = val['_value']['candidates']
n_chosen = val['_value']['n_chosen']
if n_chosen not in [0, 1, [0, 1]]:
raise ValueError('Optional_input_size can only be 0, 1, or [0, 1], but the pecified one is %s'
% (n_chosen))
if isinstance(n_chosen, list):
actions_to_config.append((key, 'input_choice'))
# FIXME: risk, candidates might also have None
actions_spaces.append(['None', *candidates])
self.chosen_arch_template[key] = None
elif n_chosen == 1:
actions_to_config.append((key, 'input_choice'))
actions_spaces.append(candidates)
self.chosen_arch_template[key] = None
elif n_chosen == 0:
self.chosen_arch_template[key] = []
else:
raise ValueError('Unsupported search space type: %s' % (val['_type']))
# calculate observation space # calculate observation space
dedup = {} dedup = {}
...@@ -428,7 +387,6 @@ class PPOTuner(Tuner): ...@@ -428,7 +387,6 @@ class PPOTuner(Tuner):
full_act_space = [act for act, _ in dedup.items()] full_act_space = [act for act, _ in dedup.items()]
assert len(full_act_space) == len(dedup) assert len(full_act_space) == len(dedup)
observation_space = len(full_act_space) observation_space = len(full_act_space)
nsteps = len(actions_spaces) nsteps = len(actions_spaces)
return actions_spaces, actions_to_config, full_act_space, observation_space, nsteps return actions_spaces, actions_to_config, full_act_space, observation_space, nsteps
...@@ -470,7 +428,7 @@ class PPOTuner(Tuner): ...@@ -470,7 +428,7 @@ class PPOTuner(Tuner):
Search space for NAS Search space for NAS
the format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html). the format could be referred to search space spec (https://nni.readthedocs.io/en/latest/Tutorial/SearchSpaceSpec.html).
""" """
logger.info('=== update search space %s', search_space) logger.info('update search space %s', search_space)
assert self.search_space is None assert self.search_space is None
self.search_space = search_space self.search_space = search_space
...@@ -496,16 +454,19 @@ class PPOTuner(Tuner): ...@@ -496,16 +454,19 @@ class PPOTuner(Tuner):
chosen_arch = copy.deepcopy(self.chosen_arch_template) chosen_arch = copy.deepcopy(self.chosen_arch_template)
for cnt, act in enumerate(actions): for cnt, act in enumerate(actions):
act_name = self.full_act_space[act] act_name = self.full_act_space[act]
(block_name, layer_name, key) = self.actions_to_config[cnt] (_key, _type) = self.actions_to_config[cnt]
if key == 'chosen_inputs': if _type == 'input_choice':
if act_name == 'None': if act_name == 'None':
chosen_arch[block_name][layer_name][key] = [] chosen_arch[_key] = {'_value': [], '_idx': []}
else: else:
chosen_arch[block_name][layer_name][key] = [act_name] candidates = self.search_space[_key]['_value']['candidates']
elif key == 'chosen_layer': idx = candidates.index(act_name)
chosen_arch[block_name][layer_name][key] = act_name chosen_arch[_key] = {'_value': [act_name], '_idx': [idx]}
elif _type == 'layer_choice':
idx = self.search_space[_key]['_value'].index(act_name)
chosen_arch[_key] = {'_value': act_name, '_idx': idx}
else: else:
raise ValueError('unrecognized key: {0}'.format(key)) raise ValueError('unrecognized key: {0}'.format(_type))
return chosen_arch return chosen_arch
def generate_multiple_parameters(self, parameter_id_list, **kwargs): def generate_multiple_parameters(self, parameter_id_list, **kwargs):
...@@ -561,6 +522,7 @@ class PPOTuner(Tuner): ...@@ -561,6 +522,7 @@ class PPOTuner(Tuner):
trial_info_idx, actions = self.trials_info.get_next() trial_info_idx, actions = self.trials_info.get_next()
if trial_info_idx is None: if trial_info_idx is None:
logger.debug('Credit added by one in parameters request')
self.credit += 1 self.credit += 1
self.param_ids.append(parameter_id) self.param_ids.append(parameter_id)
raise nni.NoMoreTrialError('no more parameters now.') raise nni.NoMoreTrialError('no more parameters now.')
...@@ -573,6 +535,7 @@ class PPOTuner(Tuner): ...@@ -573,6 +535,7 @@ class PPOTuner(Tuner):
""" """
Run a inference to generate next batch of configurations Run a inference to generate next batch of configurations
""" """
logger.debug('Start next round inference...')
self.finished_trials = 0 self.finished_trials = 0
self.model.compute_rewards(self.trials_info, self.trials_result) self.model.compute_rewards(self.trials_info, self.trials_result)
self.model.train(self.trials_info, self.inf_batch_size) self.model.train(self.trials_info, self.inf_batch_size)
...@@ -584,6 +547,7 @@ class PPOTuner(Tuner): ...@@ -584,6 +547,7 @@ class PPOTuner(Tuner):
mb_values, mb_neglogpacs, mb_values, mb_neglogpacs,
mb_dones, last_values, mb_dones, last_values,
self.inf_batch_size) self.inf_batch_size)
logger.debug('Next round inference complete.')
# check credit and submit new trials # check credit and submit new trials
for _ in range(self.credit): for _ in range(self.credit):
trial_info_idx, actions = self.trials_info.get_next() trial_info_idx, actions = self.trials_info.get_next()
...@@ -596,6 +560,7 @@ class PPOTuner(Tuner): ...@@ -596,6 +560,7 @@ class PPOTuner(Tuner):
new_config = self._actions_to_config(actions) new_config = self._actions_to_config(actions)
self.send_trial_callback(param_id, new_config) self.send_trial_callback(param_id, new_config)
self.credit -= 1 self.credit -= 1
logger.debug('Send new trial (%d, %s) for reducing credit', param_id, new_config)
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
""" """
...@@ -621,7 +586,10 @@ class PPOTuner(Tuner): ...@@ -621,7 +586,10 @@ class PPOTuner(Tuner):
self.trials_result[trial_info_idx] = value self.trials_result[trial_info_idx] = value
self.finished_trials += 1 self.finished_trials += 1
logger.debug('receive_trial_result, parameter_id %d, trial_info_idx %d, finished_trials %d, inf_batch_size %d',
parameter_id, trial_info_idx, self.finished_trials, self.inf_batch_size)
if self.finished_trials == self.inf_batch_size: if self.finished_trials == self.inf_batch_size:
logger.debug('Start next round inference in receive_trial_result')
self._next_round_inference() self._next_round_inference()
def trial_end(self, parameter_id, success, **kwargs): def trial_end(self, parameter_id, success, **kwargs):
...@@ -650,6 +618,7 @@ class PPOTuner(Tuner): ...@@ -650,6 +618,7 @@ class PPOTuner(Tuner):
self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0 self.trials_result[trial_info_idx] = (sum(values) / len(values)) if values else 0
self.finished_trials += 1 self.finished_trials += 1
if self.finished_trials == self.inf_batch_size: if self.finished_trials == self.inf_batch_size:
logger.debug('Start next round inference in trial_end')
self._next_round_inference() self._next_round_inference()
def import_data(self, data): def import_data(self, data):
......
...@@ -10,7 +10,8 @@ from .launcher import create_experiment, resume_experiment, view_experiment ...@@ -10,7 +10,8 @@ from .launcher import create_experiment, resume_experiment, view_experiment
from .updater import update_searchspace, update_concurrency, update_duration, update_trialnum, import_data from .updater import update_searchspace, update_concurrency, update_duration, update_trialnum, import_data
from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\ from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\
log_trial, experiment_clean, platform_clean, experiment_list, \ log_trial, experiment_clean, platform_clean, experiment_list, \
monitor_experiment, export_trials_data, trial_codegen, webui_url, get_config, log_stdout, log_stderr monitor_experiment, export_trials_data, trial_codegen, webui_url, \
get_config, log_stdout, log_stderr, search_space_auto_gen
from .package_management import package_install, package_show from .package_management import package_install, package_show
from .constants import DEFAULT_REST_PORT from .constants import DEFAULT_REST_PORT
from .tensorboard_utils import start_tensorboard, stop_tensorboard from .tensorboard_utils import start_tensorboard, stop_tensorboard
...@@ -38,6 +39,13 @@ def parse_args(): ...@@ -38,6 +39,13 @@ def parse_args():
# create subparsers for args with sub values # create subparsers for args with sub values
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers()
# parse the command of auto generating search space
parser_start = subparsers.add_parser('ss_gen', help='automatically generate search space file from trial code')
parser_start.add_argument('--trial_command', '-t', required=True, dest='trial_command', help='the command for running trial code')
parser_start.add_argument('--trial_dir', '-d', default='./', dest='trial_dir', help='the directory for running the command')
parser_start.add_argument('--file', '-f', default='nni_auto_gen_search_space.json', dest='file', help='the path of search space file')
parser_start.set_defaults(func=search_space_auto_gen)
# parse start command # parse start command
parser_start = subparsers.add_parser('create', help='create a new experiment') parser_start = subparsers.add_parser('create', help='create a new experiment')
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file') parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
......
...@@ -9,6 +9,7 @@ import re ...@@ -9,6 +9,7 @@ import re
import shutil import shutil
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from subprocess import Popen
from pyhdfs import HdfsClient from pyhdfs import HdfsClient
from nni_annotation import expand_annotations from nni_annotation import expand_annotations
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
...@@ -675,3 +676,16 @@ def export_trials_data(args): ...@@ -675,3 +676,16 @@ def export_trials_data(args):
print_error('Export failed...') print_error('Export failed...')
else: else:
print_error('Restful server is not Running') print_error('Restful server is not Running')
def search_space_auto_gen(args):
'''dry run trial code to generate search space file'''
trial_dir = os.path.expanduser(args.trial_dir)
file_path = os.path.expanduser(args.file)
if not os.path.isabs(file_path):
abs_file_path = os.path.join(os.getcwd(), file_path)
assert os.path.exists(trial_dir)
if os.path.exists(abs_file_path):
print_warning('%s already exits, will be over written' % abs_file_path)
print_normal('Dry run to generate search space...')
Popen(args.trial_command, cwd=trial_dir, env=dict(os.environ, NNI_GEN_SEARCH_SPACE=abs_file_path), shell=True).wait()
print_normal('Dry run to generate search space, Done')
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment