Merge pull request #168 from microsoft/master

merge master

Merge pull request #168 from microsoft/master
merge master
b5eab4b8 · SparkSnail · GitHub · 93d65029 · 8a175cd4 · b5eab4b8
Unverified Commit b5eab4b8 authored May 10, 2019 by SparkSnail Committed by GitHub May 10, 2019
13 changed files
--- a/docs/en_US/Blog/HPOComparison.md
+++ b/docs/en_US/Blog/HPOComparison.md
+# Hyperparameter Optimization Comparison
+*Posted by Anonymous Author*
+Comparison of Hyperparameter Optimization algorithms on several problems.
+Hyperparameter Optimization algorithms are list below:
+- [Random Search](../Builtin_Tuner.md#Random)
+- [Grid Search](../Builtin_Tuner.md#Random)
+- [Evolution](../Builtin_Tuner.md#Evolution)
+- [Anneal](../Builtin_Tuner.md#Anneal)
+- [Metis](../Builtin_Tuner.md#MetisTuner)
+- [TPE](../Builtin_Tuner.md#TPE)
+- [SMAC](../Builtin_Tuner.md#SMAC)
+- [HyperBand](../Builtin_Tuner.md#Hyperband)
+- [BOHB](../Builtin_Tuner.md#BOHB)
+All algorithms run in NNI local environment。
+Machine Environment：
+```
+OS: Linux Ubuntu 16.04 LTS
+CPU: Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz 2600 MHz
+Memory: 112 GB
+NNI Version: v0.7
+NNI Mode(local|pai|remote): local
+Python version: 3.6
+Is conda or virtualenv used?: Conda
+is running in docker?: no
+```
+## AutoGBDT Example
+### Problem Description
+Nonconvex problem on the hyper-parameter search of [AutoGBDT](../gbdt_example.md) example.
+### Search Space
+```json
+{
+  "num_leaves": {
+    "_type": "choice",
+    "_value": [10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 48, 64, 96, 128]
+  },
+  "learning_rate": {
+    "_type": "choice",
+    "_value": [0.00001, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5]
+  },
+  "max_depth": {
+    "_type": "choice",
+    "_value": [-1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 48, 64, 96, 128]
+  },
+  "feature_fraction": {
+    "_type": "choice",
+    "_value": [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
+  },
+  "bagging_fraction": {
+    "_type": "choice",
+    "_value": [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
+  },
+  "bagging_freq": {
+    "_type": "choice",
+    "_value": [1, 2, 4, 8, 10, 12, 14, 16]
+  }
+}
+```
+The total search space is 1,204,224, we set the number of maximum trial to 1000. The time limitation is 48 hours.
+### Results
+| Algorithm     |  Best loss    | Average of Best 5 Losses  |  Average of Best 10 Losses |
+| ------------- | ------------ | ------------- | ------------- |
+| Random Search |0.418854|0.420352|0.421553|
+| Random Search |0.417364|0.420024|0.420997|
+| Random Search |0.417861|0.419744|0.420642|
+| Grid Search   |0.498166|0.498166|0.498166|
+| Evolution     |0.409887|0.409887|0.409887|
+| Evolution     |0.413620|0.413875|0.414067|
+| Evolution     |0.409887|0.409887|0.409887|
+| Anneal        |0.414877|0.417289|0.418281|
+| Anneal        |0.409887|0.409887|0.410118|
+| Anneal        |0.413683|0.416949|0.417537|
+| Metis         |0.416273|0.420411|0.422380|
+| Metis         |0.420262|0.423175|0.424816|
+| Metis         |0.421027|0.424172|0.425714|
+| TPE           |0.414478|0.414478|0.414478|
+| TPE           |0.415077|0.417986|0.418797|
+| TPE           |0.415077|0.417009|0.418053|
+| SMAC          |**0.408386**|**0.408386**|**0.408386**|
+| SMAC          |0.414012|0.414012|0.414012|
+| SMAC          |**0.408386**|**0.408386**|**0.408386**|
+| BOHB          |0.410464|0.415319|0.417755|
+| BOHB          |0.418995|0.420268|0.422604|
+| BOHB          |0.415149|0.418072|0.418932|
+| HyperBand     |0.414065|0.415222|0.417628|
+| HyperBand     |0.416807|0.417549|0.418828|
+| HyperBand     |0.415550|0.415977|0.417186|
+For Metis, there are about 300 trials because it runs slowly due to its high time complexity O(n^3) in Gaussian Process.
+## RocksDB Benchmark 'fillrandom' and 'readrandom'
+### Problem Description
+[DB_Bench](<https://github.com/facebook/rocksdb/wiki/Benchmarking-tools>) is the main tool that is used to benchmark [RocksDB](https://rocksdb.org/)'s performance. It has so many hapermeter to tune.
+The performance of `DB_Bench` is associated with the machine configuration and installation method. We run the `DB_Bench`in the Linux machine and install the Rock in shared library.
+#### Machine configuration
+```
+RocksDB:    version 6.1
+CPU:        6 * Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz
+CPUCache:   35840 KB
+Keys:       16 bytes each
+Values:     100 bytes each (50 bytes after compression)
+Entries:    1000000
+```
+#### Storage performance
+**Latency**: each IO request will take some time to complete, this is called the average latency. There are several factors that would affect this time including network connection quality and hard disk IO performance.
+**IOPS**: **IO operations per second**, which means the amount of _read or write operations_ that could be done in one seconds time.
+**IO size**: **the size of each IO request**. Depending on the operating system and the application/service that needs disk access it will issue a request to read or write a certain amount of data at the same time.
+**Throughput (in MB/s) = Average IO size x IOPS **
+IOPS is related to online processing ability and we use the IOPS as the metric in my experiment.
+### Search Space
+```json
+{
+  "max_background_compactions": {
+    "_type": "quniform",
+    "_value": [1, 256, 1]
+  },
+  "block_size": {
+    "_type": "quniform",
+    "_value": [1, 500000, 1]
+  },
+  "write_buffer_size": {
+    "_type": "quniform",
+    "_value": [1, 130000000, 1]
+  },
+  "max_write_buffer_number": {
+    "_type": "quniform",
+    "_value": [1, 128, 1]
+  },
+  "min_write_buffer_number_to_merge": {
+    "_type": "quniform",
+    "_value": [1, 32, 1]
+  },
+  "level0_file_num_compaction_trigger": {
+    "_type": "quniform",
+    "_value": [1, 256, 1]
+  },
+  "level0_slowdown_writes_trigger": {
+    "_type": "quniform",
+    "_value": [1, 1024, 1]
+  },
+  "level0_stop_writes_trigger": {
+    "_type": "quniform",
+    "_value": [1, 1024, 1]
+  },
+  "cache_size": {
+    "_type": "quniform",
+    "_value": [1, 30000000, 1]
+  },
+  "compaction_readahead_size": {
+    "_type": "quniform",
+    "_value": [1, 30000000, 1]
+  },
+  "new_table_reader_for_compaction_inputs": {
+    "_type": "randint",
+    "_value": [1]
+  }
+}
+```
+The search space is enormous (about 10^40) and we set the maximum number of trial to 100 to limit the computation resource.
+### Results
+#### fillrandom' Benchmark
+| Model     | Best IOPS (Repeat 1) | Best IOPS (Repeat 2) | Best IOPS (Repeat 3) |
+| --------- | -------------------- | -------------------- | -------------------- |
+| Random    | 449901               | 427620               | 477174               |
+| Anneal    | 461896               | 467150               | 437528               |
+| Evolution | 436755               | 389956               | 389790               |
+| TPE       | 378346               | 482316               | 468989               |
+| SMAC      | 491067               | 490472               | **491136**           |
+| Metis     | 444920               | 457060               | 454438               |
+Figure:
+![](../../img/hpo_rocksdb_fillrandom.png)
+#### 'readrandom' Benchmark
+| Model     | Best IOPS (Repeat 1) | Best IOPS (Repeat 2) | Best IOPS (Repeat 3) |
+| --------- | -------------------- | -------------------- | -------------------- |
+| Random    | 2276157              | 2285301              | 2275142              |
+| Anneal    | 2286330              | 2282229              | 2284012              |
+| Evolution | 2286524              | 2283673              | 2283558              |
+| TPE       | 2287366              | 2282865              | 2281891              |
+| SMAC      | 2270874              | 2284904              | 2282266              |
+| Metis     | **2287696**          | 2283496              | 2277701              |
+Figure:
+![](../../img/hpo_rocksdb_readrandom.png)
--- a/docs/en_US/Blog/NASComparison.md
+++ b/docs/en_US/Blog/NASComparison.md
-# NAS Algorithms Comparison
+# Neural Architecture Search Comparison
 *Posted by Anonymous Author*
-Train and Compare NAS models including Autokeras, DARTS, ENAS and NAO.
+Train and Compare NAS (Neural Architecture Search) models including Autokeras, DARTS, ENAS and NAO.
 Their source code link is as below:
@@ -17,8 +17,6 @@ Their source code link is as below:
 To avoid over-fitting in **CIFAR-10**, we also compare the models in the other five datasets including Fashion-MNIST, CIFAR-100, OUI-Adience-Age, ImageNet-10-1 (subset of ImageNet), ImageNet-10-2 (another subset of ImageNet). We just sample a subset with 10 different labels from ImageNet to make ImageNet-10-1 or ImageNet-10-2.
 | Dataset                                                      | Training Size | Numer of Classes | Descriptions                                                 |
 | :----------------------------------------------------------- | ------------- | ---------------- | ------------------------------------------------------------ |
 | [Fashion-MNIST](<https://github.com/zalandoresearch/fashion-mnist>) | 60,000        | 10               | T-shirt/top, trouser, pullover, dress, coat, sandal, shirt, sneaker, bag and ankle boot. |
@@ -38,7 +36,7 @@ For NAO, it requires too much computing resources, so we only use NAO-WS which p
 For AutoKeras, we used  0.2.18 version because it was the latest version when we started the experiment.
-## NAS Performance 
+## NAS Performance
 | NAS             | AutoKeras (%) | ENAS (macro) (%) | ENAS (micro) (%) | DARTS (%) | NAO-WS (%) |
 | --------------- | :-----------: | :--------------: | :--------------: | :-------: | :--------: |
@@ -49,9 +47,7 @@ For AutoKeras, we used  0.2.18 version because it was the latest version when we
 | ImageNet-10-1   |     61.80     |      77.07       |      79.80       | **80.48** |   77.20    |
 | ImageNet-10-2   |     37.20     |      58.13       |      56.47       |   60.53   | **61.20**  |
+Unfortunately, we cannot reproduce all the results in the paper.
-Unfortunately, we cannot reproduce all the results in the paper. 
 The best or average results reported in the paper:
@@ -59,8 +55,6 @@ The best or average results reported in the paper:
 | --------- | ------------ | :--------------: | :--------------: | :------------: | :---------: |
 | CIFAR- 10 | 88.56(best)  |   96.13(best)    |   97.11(best)    | 97.17(average) | 96.47(best) |
 For AutoKeras, it has relatively worse performance across all datasets due to its random factor on network morphism.
 For ENAS, ENAS (macro) shows good results in OUI-Adience-Age and ENAS (micro)  shows good results in CIFAR-10. 
@@ -78,6 +72,3 @@ For NAO-WS, it shows good results in ImageNet-10-2 but it can perform very poorl
 3. Pham, Hieu, et al. "Efficient Neural Architecture Search via Parameters Sharing." international conference on machine learning (2018): 4092-4101.
 4. Luo, Renqian, et al. "Neural Architecture Optimization." neural information processing systems (2018): 7827-7838.
--- a/docs/en_US/Blog/index.rst
+++ b/docs/en_US/Blog/index.rst
 ######################
-Blog
+Research Blog
 ######################
 ..  toctree::
    :maxdepth: 2
-    NAS Comparison<NASComparison>
+    Hyperparameter Optimization Comparison<HPOComparison>
+    Neural Architecture Search Comparison<NASComparison>
\ No newline at end of file
--- a/docs/en_US/Builtin_Tuner.md
+++ b/docs/en_US/Builtin_Tuner.md
@@ -2,7 +2,7 @@
 NNI provides state-of-the-art tuning algorithm as our builtin-tuners and makes them easy to use. Below is the brief summary of NNI currently built-in Tuners:
-Note: Click the **Tuner's name** to get a detailed description of the algorithm, click the corresponding **Usage** to get the Tuner's installation requirements, suggested scenario and using example.
+Note: Click the **Tuner's name** to get a detailed description of the algorithm, click the corresponding **Usage** to get the Tuner's installation requirements, suggested scenario and using example. Here is an [article](./Blog/HPOComparison.md) about the comparison of different Tuners on several problems.
 Currently we support the following algorithms:
@@ -366,4 +366,4 @@ advisor:
    min_budget: 1
    max_budget: 27
    eta: 3
 ```
\ No newline at end of file
--- a/docs/en_US/FAQ.md
+++ b/docs/en_US/FAQ.md
@@ -30,5 +30,8 @@ If you upgrade your NNI or you delete some config files of NNI when there is an
 ### Could not get `default metric` in webUI of virtual machines
 Config the network mode to bridge mode or other mode that could make virtual machine's host accessible from external machine, and make sure the port of virtual machine is not forbidden by firewall. 
+### Windows local mode problems
+Please refer to [NNI Windows local mode](WindowsLocalMode.md) 
 ### Help us improve
 Please inquiry the problem in https://github.com/Microsoft/nni/issues to see whether there are other people already reported the problem, create a new one if there are no existing issues been created.
--- a/docs/en_US/QuickStart.md
+++ b/docs/en_US/QuickStart.md
@@ -10,7 +10,11 @@ We support Linux MacOS and Windows(local mode) in current stage, Ubuntu 16.04 or
 ```
 #### Windows
+If you choose Windows local mode and use PowerShell to run script, you need run below PowerShell command as administrator at first time.
+```bash
+    Set-ExecutionPolicy -ExecutionPolicy Unrestricted
+```
+Then install nni through pip:
 ```bash
    python -m pip install --upgrade nni
 ```
@@ -138,17 +142,14 @@ Note, **for Windows, you need to change trial command `python3` to `python`**
 All the codes above are already prepared and stored in [examples/trials/mnist/](https://github.com/Microsoft/nni/tree/master/examples/trials/mnist).
-If you choose Windows local mode and use PowerShell to run script, you need run below PowerShell command as administrator at first time.
+#### Linux and MacOS   
+Run the **config.yml** file from your command line to start MNIST experiment.
-```bash
-    Set-ExecutionPolicy -ExecutionPolicy Unrestricted
-```
-When these things are done, run below line to start an experiment.
 ```bash
    nnictl create --config nni/examples/trials/mnist/config.yml
 ```
+#### Windows   
+Run the **config_windows.yml** file from your command line to start MNIST experiment.
 **Note**, if you're using windows local mode, it needs to change `python3` to `python` in the config.yml file, or use the config_windows.yml file to start the experiment.

--- a/docs/en_US/WindowsLocalMode.md
+++ b/docs/en_US/WindowsLocalMode.md
@@ -57,7 +57,7 @@ Set-ExecutionPolicy -ExecutionPolicy Unrestricted
 ### Trial failed with missing DLL in cmd or PowerShell
-This error caused by missing LIBIFCOREMD.DLL and LIBMMD.DLL and fail to install SciPy. Anaconda python is highly recommended. If you use official python, make sure you have one of `Visual Studio`, `MATLAB`, `MKL` and `Intel Distribution for Python` installed on Windows before running NNI. If not, try to install one of products above or Anaconda python(64-bit).
+This error caused by missing LIBIFCOREMD.DLL and LIBMMD.DLL and fail to install SciPy. Using anaconda python(64-bit) can solve it.
 >ImportError: DLL load failed
 ### Trial failed on webUI
@@ -73,10 +73,12 @@ If there is a stderr file, please check out. Two possible cases are as follows:
 * forget to change the trial command `python3` into `python` in each experiment YAML.
 * forget to install experiment dependencies such as TensorFlow, Keras and so on.
-### Support tuner on Windows
+### Fail to use BOHB on Windows
+Make sure C++ 14.0 compiler installed then try to run `nnictl package install --name=BOHB` to install the dependencies.
+### Not supported tuner on Windows
+SMAC is not supported currently, the specific reason can be referred to this [github issue](https://github.com/automl/SMAC3/issues/483).
-* SMAC is not supported
-* BOHB is supported, make sure C++ 14.0 compiler and dependencies installed successfully.
 Note:

--- a/docs/img/hpo_rocksdb_fillrandom.png
+++ b/docs/img/hpo_rocksdb_fillrandom.png
--- a/docs/img/hpo_rocksdb_readrandom.png
+++ b/docs/img/hpo_rocksdb_readrandom.png
--- a/examples/trials/ga_squad/requirements.txt
+++ b/examples/trials/ga_squad/requirements.txt
-tensorflow==1.4.0
+tensorflow==1.13.1
\ No newline at end of file
--- a/examples/trials/network_morphism/requirements.txt
+++ b/examples/trials/network_morphism/requirements.txt
 numpy==1.14.2
-tensorflow==1.12.0
+tensorflow==1.13.1
 torchvision==0.2.1
 Keras==2.2.2
 torch==0.4.1
--- a/src/webui/src/components/TrialsDetail.tsx
+++ b/src/webui/src/components/TrialsDetail.tsx
@@ -23,7 +23,8 @@ interface TrialDetailState {
    experimentStatus: string;
    experimentPlatform: string;
    experimentLogCollection: boolean;
-    entriesTable: number;
+    entriesTable: number; // table components val
+    entriesInSelect: string;
    searchSpace: string;
    isMultiPhase: boolean;
 }
@@ -68,6 +69,7 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
            experimentPlatform: '',
            experimentLogCollection: false,
            entriesTable: 20,
+            entriesInSelect: '20',
            isHasSearch: false,
            searchSpace: '',
            isMultiPhase: false
@@ -82,7 +84,7 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
                axios.get(`${MANAGER_IP}/metric-data`)
            ])
            .then(axios.spread((res, res1) => {
-                if (res.status === 200) {
+                if (res.status === 200 && res1.status === 200) {
                    const trialJobs = res.data;
                    const metricSource = res1.data;
                    const trialTable: Array<TableObj> = [];
@@ -149,7 +151,7 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
                        });
                    });
                    // update search data result
-                    const { searchResultSource } = this.state;
+                    const { searchResultSource, entriesInSelect } = this.state;
                    if (searchResultSource.length !== 0) {
                        const temp: Array<number> = [];
                        Object.keys(searchResultSource).map(index => {
@@ -176,6 +178,11 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
                            tableListSource: trialTable
                        }));
                    }
+                    if (entriesInSelect === 'all' && this._isMounted) {
+                        this.setState(() => ({
+                            entriesTable: trialTable.length
+                        }));
+                    }
                }
            }));
    }
@@ -198,7 +205,7 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
                const item = tableListSource[key];
                if (item.sequenceId.toString() === targetValue
                    || item.id.includes(targetValue)
-                    || item.status.includes(targetValue)
+                    || item.status.toUpperCase().includes(targetValue.toUpperCase())
                ) {
                    searchResultList.push(item);
                }
@@ -244,7 +251,12 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
                break;
            case 'all':
                const { tableListSource } = this.state;
-                this.setState(() => ({ entriesTable: tableListSource.length }));
+                if (this._isMounted) {
+                    this.setState(() => ({
+                        entriesInSelect: 'all',
+                        entriesTable: tableListSource.length
+                    }));
+                }
                break;
            default:
        }
@@ -270,7 +282,7 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> {
                    const logCollection = res.data.params.logCollection;
                    let expLogCollection: boolean = false;
                    const isMultiy: boolean = res.data.params.multiPhase !== undefined
-                    ? res.data.params.multiPhase : false;
+                        ? res.data.params.multiPhase : false;
                    if (logCollection !== undefined && logCollection !== 'none') {
                        expLogCollection = true;
                    }

--- a/src/webui/src/components/trial-detail/Para.tsx
+++ b/src/webui/src/components/trial-detail/Para.tsx
@@ -182,7 +182,8 @@ class Para extends React.Component<ParaProps, ParaState> {
            // need to cut down the data
            if (percent !== 0) {
                const linesNum = paraData.data.length;
-                const len = Math.floor(linesNum * percent);
+                // Math.ceil rather than Math.floor to avoid lost lines
+                const len = Math.ceil(linesNum * percent);
                paraData.data.length = len;
            }
            // need to swap the yAxis
@@ -236,6 +237,8 @@ class Para extends React.Component<ParaProps, ParaState> {
            visualMapObj = {
                type: 'continuous',
                precision: 3,
+                min: 0,
+                max: max,
                color: ['#CA0000', '#FFC400', '#90EE90']
            };
        } else {