Unverified Commit 77dac12b authored by QuanluZhang's avatar QuanluZhang Committed by GitHub
Browse files

Merge pull request #3023 from microsoft/v1.9

[do not squash!] merge v1.9 back to master
parents c2e69672 98a72a1e
......@@ -31,7 +31,7 @@ class Nb201TrialConfig(Model):
Dataset used for training and evaluation. NAS-Bench-201 provides the following 4 options:
``cifar10-valid`` (training data is splited into 25k for training and 25k for validation,
validation data is used for test), ``cifar10`` (training data is used in training, validation
data is splited into 25k for validation and 25k for testing), ``cifar100`` (same protocol as ``cifar10``),
data is splited into 5k for validation and 5k for testing), ``cifar100`` (same protocol as ``cifar10``),
and ``imagenet16-120`` (a subset of 120 classes in ImageNet, downscaled to 16x16, using training data
for training, 6k images from validation set for validation and the other 6k for testing).
"""
......
......@@ -13,21 +13,29 @@ from .mutator import EnasMutator
logger = logging.getLogger(__name__)
log_frequency = 100
entropy_weight = 0.0001
skip_weight = 0.8
baseline_decay = 0.999
child_steps = 500
mutator_lr = 0.00035
mutator_steps = 50
mutator_steps_aggregate = 20
aux_weight = 0.4
test_arc_per_epoch = 1
class EnasTrainer:
def __init__(self, model, loss, metrics, reward_function, optimizer, batch_size, num_epochs,
dataset_train, dataset_valid):
def __init__(
self,
model,
loss,
metrics,
reward_function,
optimizer,
batch_size,
num_epochs,
dataset_train,
dataset_valid,
log_frequency=100,
entropy_weight=0.0001,
skip_weight=0.8,
baseline_decay=0.999,
child_steps=500,
mutator_lr=0.00035,
mutator_steps=50,
mutator_steps_aggregate=20,
aux_weight=0.4,
test_arc_per_epoch=1,
):
self.model = model
self.loss = loss
self.metrics = metrics
......@@ -42,11 +50,21 @@ class EnasTrainer:
self.valid_set = tf.data.Dataset.from_tensor_slices((x[split:], y[split:]))
self.test_set = tf.data.Dataset.from_tensor_slices(dataset_valid)
self.mutator = EnasMutator(model)
self.mutator_optim = Adam(learning_rate=mutator_lr)
self.log_frequency = log_frequency
self.entropy_weight = entropy_weight
self.skip_weight = skip_weight
self.baseline_decay = baseline_decay
self.child_steps = child_steps
self.mutator_lr = mutator_lr
self.mutator_steps = mutator_steps
self.mutator_steps_aggregate = mutator_steps_aggregate
self.aux_weight = aux_weight
self.test_arc_per_epoch = test_arc_per_epoch
self.baseline = 0.
self.mutator = EnasMutator(model)
self.mutator_optim = Adam(learning_rate=self.mutator_lr)
self.baseline = 0.0
def train(self, validate=True):
for epoch in range(self.num_epochs):
......@@ -58,14 +76,13 @@ class EnasTrainer:
def validate(self):
self.validate_one_epoch(-1)
def train_one_epoch(self, epoch):
train_loader, valid_loader = self._create_train_loader()
# Sample model and train
meters = AverageMeterGroup()
for step in range(1, child_steps + 1):
for step in range(1, self.child_steps + 1):
x, y = next(train_loader)
self.mutator.reset()
......@@ -75,64 +92,88 @@ class EnasTrainer:
logits, aux_logits = logits
aux_loss = self.loss(aux_logits, y)
else:
aux_loss = 0.
aux_loss = 0.0
metrics = self.metrics(y, logits)
loss = self.loss(y, logits) + aux_weight * aux_loss
loss = self.loss(y, logits) + self.aux_weight * aux_loss
grads = tape.gradient(loss, self.model.trainable_weights)
grads = fill_zero_grads(grads, self.model.trainable_weights)
grads, _ = tf.clip_by_global_norm(grads, 5.0)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights))
metrics['loss'] = tf.reduce_mean(loss).numpy()
metrics["loss"] = tf.reduce_mean(loss).numpy()
meters.update(metrics)
if log_frequency and step % log_frequency == 0:
logger.info("Model Epoch [%d/%d] Step [%d/%d] %s", epoch + 1,
self.num_epochs, step, child_steps, meters)
if self.log_frequency and step % self.log_frequency == 0:
logger.info(
"Model Epoch [%d/%d] Step [%d/%d] %s",
epoch + 1,
self.num_epochs,
step,
self.child_steps,
meters,
)
# Train sampler (mutator)
meters = AverageMeterGroup()
for mutator_step in range(1, mutator_steps + 1):
for mutator_step in range(1, self.mutator_steps + 1):
grads_list = []
for step in range(1, mutator_steps_aggregate + 1):
for step in range(1, self.mutator_steps_aggregate + 1):
with tf.GradientTape() as tape:
x, y = next(valid_loader)
self.mutator.reset()
logits = self.model(x, training=False)
metrics = self.metrics(y, logits)
reward = self.reward_function(y, logits) + entropy_weight * self.mutator.sample_entropy
self.baseline = self.baseline * baseline_decay + reward * (1 - baseline_decay)
reward = (
self.reward_function(y, logits)
+ self.entropy_weight * self.mutator.sample_entropy
)
self.baseline = self.baseline * self.baseline_decay + reward * (
1 - self.baseline_decay
)
loss = self.mutator.sample_log_prob * (reward - self.baseline)
loss += skip_weight * self.mutator.sample_skip_penalty
meters.update({
'reward': reward,
'loss': tf.reduce_mean(loss).numpy(),
'ent': self.mutator.sample_entropy.numpy(),
'log_prob': self.mutator.sample_log_prob.numpy(),
'baseline': self.baseline,
'skip': self.mutator.sample_skip_penalty,
})
cur_step = step + (mutator_step - 1) * mutator_steps_aggregate
if log_frequency and cur_step % log_frequency == 0:
logger.info("RL Epoch [%d/%d] Step [%d/%d] [%d/%d] %s", epoch + 1, self.num_epochs,
mutator_step, mutator_steps, step, mutator_steps_aggregate,
meters)
loss += self.skip_weight * self.mutator.sample_skip_penalty
meters.update(
{
"reward": reward,
"loss": tf.reduce_mean(loss).numpy(),
"ent": self.mutator.sample_entropy.numpy(),
"log_prob": self.mutator.sample_log_prob.numpy(),
"baseline": self.baseline,
"skip": self.mutator.sample_skip_penalty,
}
)
cur_step = step + (mutator_step - 1) * self.mutator_steps_aggregate
if self.log_frequency and cur_step % self.log_frequency == 0:
logger.info(
"RL Epoch [%d/%d] Step [%d/%d] [%d/%d] %s",
epoch + 1,
self.num_epochs,
mutator_step,
self.mutator_steps,
step,
self.mutator_steps_aggregate,
meters,
)
grads = tape.gradient(loss, self.mutator.trainable_weights)
grads = fill_zero_grads(grads, self.mutator.trainable_weights)
grads_list.append(grads)
total_grads = [tf.math.add_n(weight_grads) for weight_grads in zip(*grads_list)]
total_grads = [
tf.math.add_n(weight_grads) for weight_grads in zip(*grads_list)
]
total_grads, _ = tf.clip_by_global_norm(total_grads, 5.0)
self.mutator_optim.apply_gradients(zip(total_grads, self.mutator.trainable_weights))
self.mutator_optim.apply_gradients(
zip(total_grads, self.mutator.trainable_weights)
)
def validate_one_epoch(self, epoch):
test_loader = self._create_validate_loader()
for arc_id in range(test_arc_per_epoch):
for arc_id in range(self.test_arc_per_epoch):
meters = AverageMeterGroup()
for x, y in test_loader:
self.mutator.reset()
......@@ -141,13 +182,17 @@ class EnasTrainer:
logits, _ = logits
metrics = self.metrics(y, logits)
loss = self.loss(y, logits)
metrics['loss'] = tf.reduce_mean(loss).numpy()
metrics["loss"] = tf.reduce_mean(loss).numpy()
meters.update(metrics)
logger.info("Test Epoch [%d/%d] Arc [%d/%d] Summary %s",
epoch + 1, self.num_epochs, arc_id + 1, test_arc_per_epoch,
meters.summary())
logger.info(
"Test Epoch [%d/%d] Arc [%d/%d] Summary %s",
epoch + 1,
self.num_epochs,
arc_id + 1,
self.test_arc_per_epoch,
meters.summary(),
)
def _create_train_loader(self):
train_set = self.train_set.shuffle(1000000).repeat().batch(self.batch_size)
......
......@@ -66,7 +66,12 @@ class Mutator(BaseMutator):
if reduction_type == 'mean':
return sum(tensor_list) / len(tensor_list)
if reduction_type == 'concat':
return tf.concat(tensor_list, axis=0)
image_data_format = tf.keras.backend.image_data_format()
if image_data_format == "channels_first":
axis = 0
else:
axis = -1
return tf.concat(tensor_list, axis=axis)
raise ValueError('Unrecognized reduction policy: "{}'.format(reduction_type))
def _get_decision(self, mutable):
......
......@@ -60,7 +60,10 @@ class Customize extends React.Component<CustomizeProps, CustomizeState> {
Object.keys(customized).map(item => {
if (item !== 'tag') {
// unified data type
if (typeof copyTrialParameter[item] === 'number' && typeof customized[item] === 'string') {
if (
(typeof copyTrialParameter[item] === 'number' && typeof customized[item] === 'string') ||
(typeof copyTrialParameter[item] === 'boolean' && typeof customized[item] === 'string')
) {
customized[item] = JSON.parse(customized[item]);
}
if (searchSpace[item] === undefined) {
......
......@@ -28,9 +28,9 @@ export const Command1 = (): any => {
}
return (
<div className='command basic'>
<div className='basic'>
<div>
<p>Training platform</p>
<p className='command'>Training platform</p>
<div className='nowrap'>{EXPERIMENT.profile.params.trainingServicePlatform}</div>
<p className='lineMargin'>{title}</p>
<div className='nowrap'>{builtinName}</div>
......
......@@ -11,13 +11,18 @@ export const Command2 = (): any => {
if (clusterMetaData !== undefined) {
for (const item of clusterMetaData) {
if (item.key === 'command') {
trialCommand = item.value;
trialCommand = item.value as string;
}
if (item.key === 'trial_config') {
if (typeof item.value === 'object' && 'command' in item.value) {
trialCommand = item.value.command as string;
}
}
}
}
return (
<div className='command basic'>
<p>Log directory</p>
<div className='basic'>
<p className='command'>Log directory</p>
<div className='nowrap'>
<TooltipHost
content={EXPERIMENT.profile.logDir || 'unknown'}
......
......@@ -142,6 +142,19 @@ export const EditExperimentParam = (): any => {
showPencil();
setUnit(maxDurationUnit);
}
function convertUnit(val: string): string {
if (val === 'd') {
return 'day';
} else if (val === 'h') {
return 'hour';
} else if (val === 'm') {
return 'min';
} else {
return val;
}
}
return (
<AppContext.Consumer>
{(values): React.ReactNode => {
......@@ -164,7 +177,7 @@ export const EditExperimentParam = (): any => {
onChange={setInputVal}
/>
{isShowPencil && title === 'Max duration' && (
<span>{values.maxDurationUnit}</span>
<span>{convertUnit(values.maxDurationUnit)}</span>
)}
{!isShowPencil && title === 'Max duration' && (
<Dropdown
......
......@@ -19,9 +19,9 @@ const entriesOption = [
];
const durationUnit = [
{ key: 'm', text: 'm' },
{ key: 'h', text: 'h' },
{ key: 'd', text: 'd' }
{ key: 'm', text: 'min' },
{ key: 'h', text: 'hour' },
{ key: 'd', text: 'day' }
];
export { itemStyle1, itemStyleSucceed, itemStyle2, entriesOption, durationUnit };
......@@ -63,13 +63,12 @@ class SuccessTable extends React.Component<SuccessTableProps, SuccessTableState>
}
tooltipStr = (
<div>
<p>The experiment is running, please wait for the final metric patiently.</p>
<div className='link'>
You could also find status of trial job with <span>{DETAILTABS}</span> button.
</div>
</div>
<React.Fragment>
The experiment is running, please wait for the final metric patiently. You could also find status of trial
job with <span>{DETAILTABS}</span> button.
</React.Fragment>
);
columns = [
{
name: 'Trial No.',
......@@ -154,6 +153,7 @@ class SuccessTable extends React.Component<SuccessTableProps, SuccessTableState>
render(): React.ReactNode {
const { columns, source } = this.state;
const isNoneData = source.length === 0 ? true : false;
return (
<div id='succTable'>
<DetailsList
......
......@@ -18,8 +18,8 @@ export const TrialConfigButton = (): any => {
return (
<React.Fragment>
<Stack className='config'>
<DefaultButton text='Config' onClick={showTrialConfigpPanel} />
<DefaultButton text='Search space' onClick={showSearchSpacePanel} />
<DefaultButton text='Config' onClick={showTrialConfigpPanel} />
</Stack>
{isShowConfigPanel && <TrialConfigPanel hideConfigPanel={hideConfigPanel} activeTab={activeTab} />}
</React.Fragment>
......
......@@ -28,6 +28,7 @@ import '../../static/style/pagination.scss';
import '../../static/style/search.scss';
import '../../static/style/table.scss';
import '../../static/style/tableStatus.css';
import '../../static/style/overview/overviewTitle.scss';
import { blocked, copy, LineChart, tableListIcon } from '../buttons/Icon';
import ChangeColumnComponent from '../modals/ChangeColumnComponent';
import Compare from '../modals/Compare';
......@@ -249,7 +250,7 @@ class TableList extends React.Component<TableListProps, TableListState> {
{
key: '_expand',
name: '',
onRender: (item, index): any => {
onRender: (item): any => {
return (
<Icon
aria-hidden={true}
......@@ -269,8 +270,9 @@ class TableList extends React.Component<TableListProps, TableListState> {
} else {
this._expandedTrialIds.delete(newItem.id);
}
const newItems = [...this.state.displayedItems];
newItems[index as number] = newItem;
const newItems = this.state.displayedItems.map(item =>
item.id === newItem.id ? newItem : item
);
this.setState({
displayedItems: newItems
});
......@@ -296,17 +298,16 @@ class TableList extends React.Component<TableListProps, TableListState> {
// FIXME: default metric is hacked as latestAccuracy currently
continue;
}
const lengths = tableItems.map(item => `${item[k]}`.length);
const avgLengths = lengths.reduce((a, b) => a + b) / lengths.length;
const columnTitle = _inferColumnTitle(k);
const columnWidth = Math.max(columnTitle.length, avgLengths);
// TODO: add blacklist
// 0.85: tableWidth / screen
const widths = window.innerWidth * 0.85;
columns.push({
name: columnTitle,
key: k,
fieldName: k,
minWidth: columnWidth * 13,
maxWidth: columnWidth * 18,
minWidth: widths * 0.12,
maxWidth: widths * 0.19,
isResizable: true,
onColumnClick: this._onColumnClick.bind(this),
...(k === 'status' && {
......@@ -370,8 +371,8 @@ class TableList extends React.Component<TableListProps, TableListState> {
name: 'Operation',
key: '_operation',
fieldName: 'operation',
minWidth: 160,
maxWidth: 200,
minWidth: 150,
maxWidth: 160,
isResizable: true,
className: 'detail-table',
onRender: this._renderOperationColumn.bind(this)
......
......@@ -184,10 +184,14 @@ interface ExperimentParams {
};
clusterMetaData?: {
key: string;
value: string;
value: string | ClusterItem;
}[];
}
interface ClusterItem {
command?: string;
}
interface ExperimentProfile {
params: ExperimentParams;
id: string;
......
.command {
p {
.overviewCommand1,
.overviewCommand2 {
.command {
margin-top: 0;
font-weight: normal;
}
}
.basic {
.lineMargin {
margin-top: 20px;
font-weight: normal;
}
}
......@@ -4,11 +4,17 @@
z-index: 1000;
.ms-Button--default {
padding: 0;
margin: 0 0 15px 0;
padding: 0 8px;
margin: 0 0 12px 0;
border: none;
box-shadow: 0 3px 3px rgba(0, 0, 0, 0.08);
border-radius: 18px 0 0 18px;
border: 1px solid #ccc;
color: #0573bc;
font-size: 12px;
text-align: left;
.ms-Button-label {
font-weight: normal;
}
}
.ms-Button--default:hover {
......
......@@ -57,7 +57,7 @@ $seriesIconMargin: 8px;
}
&-dropdown {
width: 51px;
width: 48px;
display: inline-block;
position: relative;
top: 13px;
......
$iconPaddingVal: 20px;
.panelTitle {
img {
height: 23px;
/* (38 - 22 ) / 2 */
margin-top: 8px;
/* icon right */
padding: 0 $iconPaddingVal 0 0;
}
span {
font-size: 18px;
font-weight: 600;
......
......@@ -3,7 +3,6 @@
color: #0573bc;
font-size: 20px;
font-weight: 600;
margin-top: 5px;
.status-text {
display: inline-block;
......
......@@ -5,17 +5,15 @@
position: relative;
.succTable-tooltip {
width: 90%;
position: absolute;
top: 40%;
left: 5%;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
.link {
margin-left: 15px;
a {
font-weight: 500;
color: blue;
}
a {
font-weight: 500;
color: blue;
}
}
......
......@@ -8913,10 +8913,10 @@ node-fetch-npm@^2.0.2:
json-parse-better-errors "^1.0.0"
safe-buffer "^5.1.1"
 
node-forge@0.9.0:
version "0.9.0"
resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.9.0.tgz#d624050edbb44874adca12bb9a52ec63cb782579"
integrity sha512-7ASaDa3pD+lJ3WvXFsxekJQelBKRpne+GOVbLbtHYdd7pFspyeuJHnWfLplGf3SwKGbfs/aYl5V/JCIaHVUKKQ==
node-forge@^0.10.0:
version "0.10.0"
resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3"
integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA==
 
node-gyp@^3.8.0:
version "3.8.0"
......
......@@ -95,6 +95,8 @@ pai:
containerNFSMountPath:
paiStorageConfigName:
remote:
remoteConfig:
reuse: false
machineList:
- ip:
passwd:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment