Merge pull request #3023 from microsoft/v1.9

[do not squash!] merge v1.9 back to master

Merge pull request #3023 from microsoft/v1.9
[do not squash!] merge v1.9 back to master
77dac12b · QuanluZhang · GitHub · c2e69672 · 98a72a1e · 77dac12b
Unverified Commit 77dac12b authored Oct 23, 2020 by QuanluZhang Committed by GitHub Oct 23, 2020
20 changed files
--- a/src/sdk/pynni/nni/nas/benchmarks/nasbench201/model.py
+++ b/src/sdk/pynni/nni/nas/benchmarks/nasbench201/model.py
@@ -31,7 +31,7 @@ class Nb201TrialConfig(Model):
        Dataset used for training and evaluation. NAS-Bench-201 provides the following 4 options:
        ``cifar10-valid`` (training data is splited into 25k for training and 25k for validation,
        validation data is used for test), ``cifar10`` (training data is used in training, validation
-        data is splited into 25k for validation and 25k for testing), ``cifar100`` (same protocol as ``cifar10``),
+        data is splited into 5k for validation and 5k for testing), ``cifar100`` (same protocol as ``cifar10``),
        and ``imagenet16-120`` (a subset of 120 classes in ImageNet, downscaled to 16x16, using training data
        for training, 6k images from validation set for validation and the other 6k for testing).
    """

--- a/src/sdk/pynni/nni/nas/tensorflow/enas/trainer.py
+++ b/src/sdk/pynni/nni/nas/tensorflow/enas/trainer.py
@@ -13,21 +13,29 @@ from .mutator import EnasMutator
 logger = logging.getLogger(__name__)


-log_frequency = 100
-entropy_weight = 0.0001
-skip_weight = 0.8
-baseline_decay = 0.999
-child_steps = 500
-mutator_lr = 0.00035
-mutator_steps = 50
-mutator_steps_aggregate = 20
-aux_weight = 0.4
-test_arc_per_epoch = 1
-
-
 class EnasTrainer:
-    def __init__(self, model, loss, metrics, reward_function, optimizer, batch_size, num_epochs,
-                 dataset_train, dataset_valid):
+    def __init__(
+        self,
+        model,
+        loss,
+        metrics,
+        reward_function,
+        optimizer,
+        batch_size,
+        num_epochs,
+        dataset_train,
+        dataset_valid,
+        log_frequency=100,
+        entropy_weight=0.0001,
+        skip_weight=0.8,
+        baseline_decay=0.999,
+        child_steps=500,
+        mutator_lr=0.00035,
+        mutator_steps=50,
+        mutator_steps_aggregate=20,
+        aux_weight=0.4,
+        test_arc_per_epoch=1,
+    ):
        self.model = model
        self.loss = loss
        self.metrics = metrics
@@ -42,11 +50,21 @@ class EnasTrainer:
        self.valid_set = tf.data.Dataset.from_tensor_slices((x[split:], y[split:]))
        self.test_set = tf.data.Dataset.from_tensor_slices(dataset_valid)

-        self.mutator = EnasMutator(model)
-        self.mutator_optim = Adam(learning_rate=mutator_lr)
+        self.log_frequency = log_frequency
+        self.entropy_weight = entropy_weight
+        self.skip_weight = skip_weight
+        self.baseline_decay = baseline_decay
+        self.child_steps = child_steps
+        self.mutator_lr = mutator_lr
+        self.mutator_steps = mutator_steps
+        self.mutator_steps_aggregate = mutator_steps_aggregate
+        self.aux_weight = aux_weight
+        self.test_arc_per_epoch = test_arc_per_epoch

-        self.baseline = 0.
+        self.mutator = EnasMutator(model)
+        self.mutator_optim = Adam(learning_rate=self.mutator_lr)

+        self.baseline = 0.0

    def train(self, validate=True):
        for epoch in range(self.num_epochs):
@@ -58,14 +76,13 @@ class EnasTrainer:
    def validate(self):
        self.validate_one_epoch(-1)

-
    def train_one_epoch(self, epoch):
        train_loader, valid_loader = self._create_train_loader()

        # Sample model and train
        meters = AverageMeterGroup()

-        for step in range(1, child_steps + 1):
+        for step in range(1, self.child_steps + 1):
            x, y = next(train_loader)
            self.mutator.reset()

@@ -75,64 +92,88 @@ class EnasTrainer:
                    logits, aux_logits = logits
                    aux_loss = self.loss(aux_logits, y)
                else:
-                    aux_loss = 0.
+                    aux_loss = 0.0
                metrics = self.metrics(y, logits)
-                loss = self.loss(y, logits) + aux_weight * aux_loss
+                loss = self.loss(y, logits) + self.aux_weight * aux_loss

            grads = tape.gradient(loss, self.model.trainable_weights)
            grads = fill_zero_grads(grads, self.model.trainable_weights)
            grads, _ = tf.clip_by_global_norm(grads, 5.0)
            self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights))

-            metrics['loss'] = tf.reduce_mean(loss).numpy()
+            metrics["loss"] = tf.reduce_mean(loss).numpy()
            meters.update(metrics)

-            if log_frequency and step % log_frequency == 0:
-                logger.info("Model Epoch [%d/%d] Step [%d/%d]  %s", epoch + 1,
-                            self.num_epochs, step, child_steps, meters)
+            if self.log_frequency and step % self.log_frequency == 0:
+                logger.info(
+                    "Model Epoch [%d/%d] Step [%d/%d]  %s",
+                    epoch + 1,
+                    self.num_epochs,
+                    step,
+                    self.child_steps,
+                    meters,
+                )

        # Train sampler (mutator)
        meters = AverageMeterGroup()
-        for mutator_step in range(1, mutator_steps + 1):
+        for mutator_step in range(1, self.mutator_steps + 1):
            grads_list = []
-            for step in range(1, mutator_steps_aggregate + 1):
+            for step in range(1, self.mutator_steps_aggregate + 1):
                with tf.GradientTape() as tape:
                    x, y = next(valid_loader)
                    self.mutator.reset()

                    logits = self.model(x, training=False)
                    metrics = self.metrics(y, logits)
-                    reward = self.reward_function(y, logits) + entropy_weight * self.mutator.sample_entropy
-                    self.baseline = self.baseline * baseline_decay + reward * (1 - baseline_decay)
+                    reward = (
+                        self.reward_function(y, logits)
+                        + self.entropy_weight * self.mutator.sample_entropy
+                    )
+                    self.baseline = self.baseline * self.baseline_decay + reward * (
+                        1 - self.baseline_decay
+                    )
                    loss = self.mutator.sample_log_prob * (reward - self.baseline)
-                    loss += skip_weight * self.mutator.sample_skip_penalty
-
-                    meters.update({
-                        'reward': reward,
-                        'loss': tf.reduce_mean(loss).numpy(),
-                        'ent': self.mutator.sample_entropy.numpy(),
-                        'log_prob': self.mutator.sample_log_prob.numpy(),
-                        'baseline': self.baseline,
-                        'skip': self.mutator.sample_skip_penalty,
-                    })
-
-                    cur_step = step + (mutator_step - 1) * mutator_steps_aggregate
-                    if log_frequency and cur_step % log_frequency == 0:
-                        logger.info("RL Epoch [%d/%d] Step [%d/%d] [%d/%d]  %s", epoch + 1, self.num_epochs,
-                                    mutator_step, mutator_steps, step, mutator_steps_aggregate,
-                                    meters)
+                    loss += self.skip_weight * self.mutator.sample_skip_penalty
+
+                    meters.update(
+                        {
+                            "reward": reward,
+                            "loss": tf.reduce_mean(loss).numpy(),
+                            "ent": self.mutator.sample_entropy.numpy(),
+                            "log_prob": self.mutator.sample_log_prob.numpy(),
+                            "baseline": self.baseline,
+                            "skip": self.mutator.sample_skip_penalty,
+                        }
+                    )
+
+                    cur_step = step + (mutator_step - 1) * self.mutator_steps_aggregate
+                    if self.log_frequency and cur_step % self.log_frequency == 0:
+                        logger.info(
+                            "RL Epoch [%d/%d] Step [%d/%d] [%d/%d]  %s",
+                            epoch + 1,
+                            self.num_epochs,
+                            mutator_step,
+                            self.mutator_steps,
+                            step,
+                            self.mutator_steps_aggregate,
+                            meters,
+                        )

                grads = tape.gradient(loss, self.mutator.trainable_weights)
                grads = fill_zero_grads(grads, self.mutator.trainable_weights)
                grads_list.append(grads)
-            total_grads = [tf.math.add_n(weight_grads) for weight_grads in zip(*grads_list)]
+            total_grads = [
+                tf.math.add_n(weight_grads) for weight_grads in zip(*grads_list)
+            ]
            total_grads, _ = tf.clip_by_global_norm(total_grads, 5.0)
-            self.mutator_optim.apply_gradients(zip(total_grads, self.mutator.trainable_weights))
+            self.mutator_optim.apply_gradients(
+                zip(total_grads, self.mutator.trainable_weights)
+            )

    def validate_one_epoch(self, epoch):
        test_loader = self._create_validate_loader()

-        for arc_id in range(test_arc_per_epoch):
+        for arc_id in range(self.test_arc_per_epoch):
            meters = AverageMeterGroup()
            for x, y in test_loader:
                self.mutator.reset()
@@ -141,13 +182,17 @@ class EnasTrainer:
                    logits, _ = logits
                metrics = self.metrics(y, logits)
                loss = self.loss(y, logits)
-                metrics['loss'] = tf.reduce_mean(loss).numpy()
+                metrics["loss"] = tf.reduce_mean(loss).numpy()
                meters.update(metrics)

-            logger.info("Test Epoch [%d/%d] Arc [%d/%d] Summary  %s",
-                        epoch + 1, self.num_epochs, arc_id + 1, test_arc_per_epoch,
-                        meters.summary())
-
+            logger.info(
+                "Test Epoch [%d/%d] Arc [%d/%d] Summary  %s",
+                epoch + 1,
+                self.num_epochs,
+                arc_id + 1,
+                self.test_arc_per_epoch,
+                meters.summary(),
+            )

    def _create_train_loader(self):
        train_set = self.train_set.shuffle(1000000).repeat().batch(self.batch_size)

--- a/src/sdk/pynni/nni/nas/tensorflow/mutator.py
+++ b/src/sdk/pynni/nni/nas/tensorflow/mutator.py
@@ -66,7 +66,12 @@ class Mutator(BaseMutator):
        if reduction_type == 'mean':
            return sum(tensor_list) / len(tensor_list)
        if reduction_type == 'concat':
-            return tf.concat(tensor_list, axis=0)
+            image_data_format = tf.keras.backend.image_data_format()
+            if image_data_format == "channels_first":
+                axis = 0
+            else:
+                axis = -1
+            return tf.concat(tensor_list, axis=axis)
        raise ValueError('Unrecognized reduction policy: "{}'.format(reduction_type))

    def _get_decision(self, mutable):

--- a/src/webui/src/components/modals/CustomizedTrial.tsx
+++ b/src/webui/src/components/modals/CustomizedTrial.tsx
@@ -60,7 +60,10 @@ class Customize extends React.Component<CustomizeProps, CustomizeState> {
        Object.keys(customized).map(item => {
            if (item !== 'tag') {
                // unified data type
-                if (typeof copyTrialParameter[item] === 'number' && typeof customized[item] === 'string') {
+                if (
+                    (typeof copyTrialParameter[item] === 'number' && typeof customized[item] === 'string') ||
+                    (typeof copyTrialParameter[item] === 'boolean' && typeof customized[item] === 'string')
+                ) {
                    customized[item] = JSON.parse(customized[item]);
                }
                if (searchSpace[item] === undefined) {

--- a/src/webui/src/components/overview/command/Command1.tsx
+++ b/src/webui/src/components/overview/command/Command1.tsx
@@ -28,9 +28,9 @@ export const Command1 = (): any => {
    }

    return (
-        <div className='command basic'>
+        <div className='basic'>
            <div>
-                <p>Training platform</p>
+                <p className='command'>Training platform</p>
                <div className='nowrap'>{EXPERIMENT.profile.params.trainingServicePlatform}</div>
                <p className='lineMargin'>{title}</p>
                <div className='nowrap'>{builtinName}</div>

--- a/src/webui/src/components/overview/command/Command2.tsx
+++ b/src/webui/src/components/overview/command/Command2.tsx
@@ -11,13 +11,18 @@ export const Command2 = (): any => {
    if (clusterMetaData !== undefined) {
        for (const item of clusterMetaData) {
            if (item.key === 'command') {
-                trialCommand = item.value;
+                trialCommand = item.value as string;
+            }
+            if (item.key === 'trial_config') {
+                if (typeof item.value === 'object' && 'command' in item.value) {
+                    trialCommand = item.value.command as string;
+                }
            }
        }
    }
    return (
-        <div className='command basic'>
-            <p>Log directory</p>
+        <div className='basic'>
+            <p className='command'>Log directory</p>
            <div className='nowrap'>
                <TooltipHost
                    content={EXPERIMENT.profile.logDir || 'unknown'}

--- a/src/webui/src/components/overview/count/EditExperimentParam.tsx
+++ b/src/webui/src/components/overview/count/EditExperimentParam.tsx
@@ -142,6 +142,19 @@ export const EditExperimentParam = (): any => {
        showPencil();
        setUnit(maxDurationUnit);
    }
+
+    function convertUnit(val: string): string {
+        if (val === 'd') {
+            return 'day';
+        } else if (val === 'h') {
+            return 'hour';
+        } else if (val === 'm') {
+            return 'min';
+        } else {
+            return val;
+        }
+    }
+
    return (
        <AppContext.Consumer>
            {(values): React.ReactNode => {
@@ -164,7 +177,7 @@ export const EditExperimentParam = (): any => {
                                            onChange={setInputVal}
                                        />
                                        {isShowPencil && title === 'Max duration' && (
-                                            <span>{values.maxDurationUnit}</span>
+                                            <span>{convertUnit(values.maxDurationUnit)}</span>
                                        )}
                                        {!isShowPencil && title === 'Max duration' && (
                                            <Dropdown

--- a/src/webui/src/components/overview/overviewConst.ts
+++ b/src/webui/src/components/overview/overviewConst.ts
@@ -19,9 +19,9 @@ const entriesOption = [
 ];

 const durationUnit = [
-    { key: 'm', text: 'm' },
-    { key: 'h', text: 'h' },
-    { key: 'd', text: 'd' }
+    { key: 'm', text: 'min' },
+    { key: 'h', text: 'hour' },
+    { key: 'd', text: 'day' }
 ];

 export { itemStyle1, itemStyleSucceed, itemStyle2, entriesOption, durationUnit };
--- a/src/webui/src/components/overview/table/SuccessTable.tsx
+++ b/src/webui/src/components/overview/table/SuccessTable.tsx
@@ -63,13 +63,12 @@ class SuccessTable extends React.Component<SuccessTableProps, SuccessTableState>
    }

    tooltipStr = (
-        <div>
-            <p>The experiment is running, please wait for the final metric patiently.</p>
-            <div className='link'>
-                You could also find status of trial job with <span>{DETAILTABS}</span> button.
-            </div>
-        </div>
+        <React.Fragment>
+            The experiment is running, please wait for the final metric patiently. You could also find status of trial
+            job with <span>{DETAILTABS}</span> button.
+        </React.Fragment>
    );
+
    columns = [
        {
            name: 'Trial No.',
@@ -154,6 +153,7 @@ class SuccessTable extends React.Component<SuccessTableProps, SuccessTableState>
    render(): React.ReactNode {
        const { columns, source } = this.state;
        const isNoneData = source.length === 0 ? true : false;
+
        return (
            <div id='succTable'>
                <DetailsList

--- a/src/webui/src/components/public-child/config/TrialConfigButton.tsx
+++ b/src/webui/src/components/public-child/config/TrialConfigButton.tsx
@@ -18,8 +18,8 @@ export const TrialConfigButton = (): any => {
    return (
        <React.Fragment>
            <Stack className='config'>
-                <DefaultButton text='Config' onClick={showTrialConfigpPanel} />
                <DefaultButton text='Search space' onClick={showSearchSpacePanel} />
+                <DefaultButton text='Config' onClick={showTrialConfigpPanel} />
            </Stack>
            {isShowConfigPanel && <TrialConfigPanel hideConfigPanel={hideConfigPanel} activeTab={activeTab} />}
        </React.Fragment>

--- a/src/webui/src/components/trial-detail/TableList.tsx
+++ b/src/webui/src/components/trial-detail/TableList.tsx
@@ -28,6 +28,7 @@ import '../../static/style/pagination.scss';
 import '../../static/style/search.scss';
 import '../../static/style/table.scss';
 import '../../static/style/tableStatus.css';
+import '../../static/style/overview/overviewTitle.scss';
 import { blocked, copy, LineChart, tableListIcon } from '../buttons/Icon';
 import ChangeColumnComponent from '../modals/ChangeColumnComponent';
 import Compare from '../modals/Compare';
@@ -249,7 +250,7 @@ class TableList extends React.Component<TableListProps, TableListState> {
            {
                key: '_expand',
                name: '',
-                onRender: (item, index): any => {
+                onRender: (item): any => {
                    return (
                        <Icon
                            aria-hidden={true}
@@ -269,8 +270,9 @@ class TableList extends React.Component<TableListProps, TableListState> {
                                } else {
                                    this._expandedTrialIds.delete(newItem.id);
                                }
-                                const newItems = [...this.state.displayedItems];
-                                newItems[index as number] = newItem;
+                                const newItems = this.state.displayedItems.map(item =>
+                                    item.id === newItem.id ? newItem : item
+                                );
                                this.setState({
                                    displayedItems: newItems
                                });
@@ -296,17 +298,16 @@ class TableList extends React.Component<TableListProps, TableListState> {
                // FIXME: default metric is hacked as latestAccuracy currently
                continue;
            }
-            const lengths = tableItems.map(item => `${item[k]}`.length);
-            const avgLengths = lengths.reduce((a, b) => a + b) / lengths.length;
            const columnTitle = _inferColumnTitle(k);
-            const columnWidth = Math.max(columnTitle.length, avgLengths);
            // TODO: add blacklist
+            // 0.85: tableWidth / screen
+            const widths = window.innerWidth * 0.85;
            columns.push({
                name: columnTitle,
                key: k,
                fieldName: k,
-                minWidth: columnWidth * 13,
-                maxWidth: columnWidth * 18,
+                minWidth: widths * 0.12,
+                maxWidth: widths * 0.19,
                isResizable: true,
                onColumnClick: this._onColumnClick.bind(this),
                ...(k === 'status' && {
@@ -370,8 +371,8 @@ class TableList extends React.Component<TableListProps, TableListState> {
            name: 'Operation',
            key: '_operation',
            fieldName: 'operation',
-            minWidth: 160,
-            maxWidth: 200,
+            minWidth: 150,
+            maxWidth: 160,
            isResizable: true,
            className: 'detail-table',
            onRender: this._renderOperationColumn.bind(this)

--- a/src/webui/src/static/interface.ts
+++ b/src/webui/src/static/interface.ts
@@ -184,10 +184,14 @@ interface ExperimentParams {
    };
    clusterMetaData?: {
        key: string;
-        value: string;
+        value: string | ClusterItem;
    }[];
 }

+interface ClusterItem {
+    command?: string;
+}
+
 interface ExperimentProfile {
    params: ExperimentParams;
    id: string;

--- a/src/webui/src/static/style/overview/command.scss
+++ b/src/webui/src/static/style/overview/command.scss
-.command {
-    p {
+.overviewCommand1,
+.overviewCommand2 {
+    .command {
        margin-top: 0;
+        font-weight: normal;
    }
+}

+.basic {
    .lineMargin {
        margin-top: 20px;
+        font-weight: normal;
    }
 }
--- a/src/webui/src/static/style/overview/config.scss
+++ b/src/webui/src/static/style/overview/config.scss
@@ -4,11 +4,17 @@
    z-index: 1000;

    .ms-Button--default {
-        padding: 0;
-        margin: 0 0 15px 0;
+        padding: 0 8px;
+        margin: 0 0 12px 0;
+        border: none;
+        box-shadow: 0 3px 3px rgba(0, 0, 0, 0.08);
        border-radius: 18px 0 0 18px;
-        border: 1px solid #ccc;
-        color: #0573bc;
+        font-size: 12px;
+        text-align: left;
+
+        .ms-Button-label {
+            font-weight: normal;
+        }
    }

    .ms-Button--default:hover {

--- a/src/webui/src/static/style/overview/count.scss
+++ b/src/webui/src/static/style/overview/count.scss
@@ -57,7 +57,7 @@ $seriesIconMargin: 8px;
    }

    &-dropdown {
-        width: 51px;
+        width: 48px;
        display: inline-block;
        position: relative;
        top: 13px;

--- a/src/webui/src/static/style/overview/overviewTitle.scss
+++ b/src/webui/src/static/style/overview/overviewTitle.scss
 $iconPaddingVal: 20px;

 .panelTitle {
-    img {
-        height: 23px;
-
-        /* (38 - 22 ) / 2 */
-        margin-top: 8px;
-
-        /* icon right */
-        padding: 0 $iconPaddingVal 0 0;
-    }
-
    span {
        font-size: 18px;
        font-weight: 600;

--- a/src/webui/src/static/style/progress/progress.scss
+++ b/src/webui/src/static/style/progress/progress.scss
@@ -3,7 +3,6 @@
    color: #0573bc;
    font-size: 20px;
    font-weight: 600;
-    margin-top: 5px;

    .status-text {
        display: inline-block;

--- a/src/webui/src/static/style/succTable.scss
+++ b/src/webui/src/static/style/succTable.scss
@@ -5,17 +5,15 @@
    position: relative;

    .succTable-tooltip {
+        width: 90%;
        position: absolute;
-        top: 40%;
-        left: 5%;
+        left: 50%;
+        top: 50%;
+        transform: translate(-50%, -50%);

-        .link {
-            margin-left: 15px;
-
-            a {
-                font-weight: 500;
-                color: blue;
-            }
+        a {
+            font-weight: 500;
+            color: blue;
        }
    }


--- a/src/webui/yarn.lock
+++ b/src/webui/yarn.lock
@@ -8913,10 +8913,10 @@ node-fetch-npm@^2.0.2:
    json-parse-better-errors "^1.0.0"
    safe-buffer "^5.1.1"
  
-node-forge@0.9.0:
-  version "0.9.0"
-  resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.9.0.tgz#d624050edbb44874adca12bb9a52ec63cb782579"
-  integrity sha512-7ASaDa3pD+lJ3WvXFsxekJQelBKRpne+GOVbLbtHYdd7pFspyeuJHnWfLplGf3SwKGbfs/aYl5V/JCIaHVUKKQ==
+node-forge@^0.10.0:
+  version "0.10.0"
+  resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3"
+  integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA==
  
 node-gyp@^3.8.0:
  version "3.8.0"

--- a/test/config/training_service.yml
+++ b/test/config/training_service.yml
@@ -95,6 +95,8 @@ pai:
    containerNFSMountPath: 
    paiStorageConfigName: 
 remote:
+  remoteConfig:
+    reuse: false
  machineList:
  - ip:
    passwd: