Merge pull request #109 from Microsoft/master

merge master

Merge pull request #109 from Microsoft/master
merge master
4e5d8365 · SparkSnail · GitHub · e661c552 · 50697a2f · 4e5d8365
Unverified Commit 4e5d8365 authored Jan 03, 2019 by SparkSnail Committed by GitHub Jan 03, 2019
14 changed files
--- a/docs/ExperimentConfig.md
+++ b/docs/ExperimentConfig.md
@@ -5,6 +5,7 @@ The config file is written in yaml format, and need to be written correctly.
 This document describes the rule to write config file, and will provide some examples and templates. 
 ## Template
 * __light weight(without Annotation and Assessor)__ 
+
 ```
 authorName: 
 experimentName: 
@@ -34,7 +35,9 @@ machineList:
    username: 
    passwd: 
 ```
+
 * __Use Assessor__
+
 ```
 authorName: 
 experimentName: 
@@ -71,7 +74,9 @@ machineList:
    username: 
    passwd: 
 ```
+
 * __Use Annotation__
+
 ```
 authorName: 
 experimentName: 
@@ -107,6 +112,7 @@ machineList:
    username: 
    passwd: 
 ```
+
 ## Configuration
 * __authorName__
  * Description  
@@ -207,27 +213,27 @@ machineList:
 
  * Description
  
-    __assessor__ specifies the assessor algorithm to run an experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, users need to set __builtinAssessorName__ and __classArgs__. Another way is to use users' own tuner file, and need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
+    __assessor__ specifies the assessor algorithm to run an experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, users need to set __builtinAssessorName__ and __classArgs__. Another way is to use users' own assessor file, and need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
  * __builtinAssessorName__ and __classArgs__
    * __builtinAssessorName__
    
-	  __builtinAssessorName__ specifies the name of system assessor, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
+        __builtinAssessorName__ specifies the name of system assessor, nni sdk provides one kind of assessor {__Medianstop__}
    * __classArgs__

-	   __classArgs__ specifies the arguments of tuner algorithm
+        __classArgs__ specifies the arguments of assessor algorithm
  * __codeDir__, __classFileName__, __className__ and __classArgs__
    * __codeDir__
        
-		__codeDir__ specifies the directory of tuner code.
+         __codeDir__ specifies the directory of assessor code.
    * __classFileName__
 	   
-	  __classFileName__ specifies the name of tuner file.
+         __classFileName__ specifies the name of assessor file.
    * __className__
 	   
-	  __className__ specifies the name of tuner class.
+         __className__ specifies the name of assessor class.
    * __classArgs__
 	   
-	  __classArgs__ specifies the arguments of tuner algorithm.
+         __classArgs__ specifies the arguments of assessor algorithm.
  * __gpuNum__
    
      __gpuNum__ specifies the gpu number to run the assessor process. The value of this field should be a positive number.
@@ -306,7 +312,7 @@ machineList:
    
    * __image__
      
-      __iamge__ set the image to be used in __ps__.
+      __image__ set the image to be used in __ps__.

  * __worker__
    
@@ -333,7 +339,7 @@ machineList:
    
    * __image__
      
-      __iamge__ set the image to be used in __worker__.
+      __image__ set the image to be used in __worker__.



@@ -426,6 +432,7 @@ machineList:
 * __local mode__

  If users want to run trial jobs in local machine, and use annotation to generate search space, could use the following config:
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -450,6 +457,7 @@ trial:
 ```

  Could add assessor configuration in config file if set assessor.
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -482,6 +490,7 @@ trial:
 ```

  Or you could specify your own tuner and assessor file as following:
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -518,6 +527,7 @@ trial:
 * __remote mode__

 If run trial jobs in remote machine, users could specify the remote mahcine information as fllowing format:
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -596,7 +606,6 @@ paiConfig:
  passWord: test
  #The host of restful server of pai
  host: 10.10.10.10
-
 ```

 * __kubeflow mode__
@@ -635,7 +644,9 @@ kubeflowConfig:
    server: 10.10.10.10
    path: /var/nfs/general
 ```
+
 kubeflow use azure storage
+
 ```
 authorName: default
 experimentName: example_mni

--- a/examples/tuners/enas_nni/README.md
+++ b/examples/tuners/enas_nni/README.md
+ **Run ENAS in NNI**	
+ ===	
+ 
+  Now we have an enas example [enas-nni](https://github.com/countif/enas_nni) run in nni from our contributors.	
+ Thanks our lovely contributors. 	
+ And welcome more and more people to join us!
\ No newline at end of file
--- a/src/nni_manager/common/manager.ts
+++ b/src/nni_manager/common/manager.ts
@@ -85,7 +85,7 @@ interface TrialJobStatistics {
 }

 interface NNIManagerStatus {
-    status: 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL';
+    status: 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL';
    errors: string[];
 }


--- a/src/nni_manager/core/nnimanager.ts
+++ b/src/nni_manager/core/nnimanager.ts
@@ -425,14 +425,10 @@ class NNIManager implements Manager {
            throw new Error('Error: tuner has not been setup');
        }
        let allFinishedTrialJobNum: number = 0;
+        let waitSubmittedToFinish: number;
        while (this.status.status !== 'STOPPING' && this.status.status !== 'STOPPED') {
            const finishedTrialJobNum: number = await this.requestTrialJobsStatus();
-
            allFinishedTrialJobNum += finishedTrialJobNum;
-            if (allFinishedTrialJobNum >= this.experimentProfile.params.maxTrialNum) {
-                // write this log for travis CI
-                this.log.info('Experiment done.');
-            }

            // requestTrialNum is the number of trials that will be requested from tuner.
            // If trialConcurrency does not change, requestTrialNum equals finishedTrialJobNum.
@@ -467,21 +463,29 @@ class NNIManager implements Manager {
            // as still running. DONE could be transfered from RUNNING or NO_MORE_TRIAL.
            assert(this.status.status === 'RUNNING' ||
                this.status.status === 'DONE' ||
-                this.status.status === 'NO_MORE_TRIAL');
+                this.status.status === 'NO_MORE_TRIAL' ||
+                this.status.status === 'TUNER_NO_MORE_TRIAL');
            if (this.experimentProfile.execDuration > this.experimentProfile.params.maxExecDuration ||
                this.currSubmittedTrialNum >= this.experimentProfile.params.maxTrialNum) {
-                if (this.status.status === 'RUNNING' ||
-                    this.status.status === 'NO_MORE_TRIAL') {
+                if (this.status.status !== 'DONE') {
+                    this.status.status = 'NO_MORE_TRIAL';
+                    waitSubmittedToFinish = this.currSubmittedTrialNum;
+
+                    assert(allFinishedTrialJobNum <= waitSubmittedToFinish);
+                    if (allFinishedTrialJobNum >= waitSubmittedToFinish) {
+                        this.status.status = 'DONE';
                        this.experimentProfile.endTime = Date.now();
                        await this.storeExperimentProfile();
+                        // write this log for travis CI
+                        this.log.info('Experiment done.');
+                    }
                }
-                this.status.status = 'DONE';
            } else {
                if (this.status.status === 'DONE') {
                    delete this.experimentProfile.endTime;
                    await this.storeExperimentProfile();
                }
-                if (this.status.status !== 'NO_MORE_TRIAL') {
+                if (this.status.status !== 'TUNER_NO_MORE_TRIAL') {
                    this.status.status = 'RUNNING';
                }
                for (let i: number = this.trialJobs.size; i < this.experimentProfile.params.trialConcurrency; i++) {
@@ -602,7 +606,7 @@ class NNIManager implements Manager {
                this.requestTrialJobs(this.experimentProfile.params.trialConcurrency);
                break;
            case NEW_TRIAL_JOB:
-                if (this.status.status === 'NO_MORE_TRIAL') {
+                if (this.status.status === 'TUNER_NO_MORE_TRIAL') {
                    this.log.warning('It is not supposed to receive more trials after NO_MORE_TRIAL is set');
                    this.status.status = 'RUNNING';
                }
@@ -625,7 +629,7 @@ class NNIManager implements Manager {
                    'ADD_HYPERPARAMETER', tunerCommand.trial_job_id, content, undefined);
                break;
            case NO_MORE_TRIAL_JOBS:
-                this.status.status = 'NO_MORE_TRIAL';
+                this.status.status = 'TUNER_NO_MORE_TRIAL';
                break;
            case KILL_TRIAL_JOB:
                await this.trainingService.cancelTrialJob(JSON.parse(content), true);

--- a/src/nni_manager/core/test/import_all.test.ts
+++ b/src/nni_manager/core/test/import_all.test.ts
+import * as glob from 'glob';
+
+// Istanbul only generates report for used/imported files, the files are not used/imported by test cases
+// are not included in code coverage reports.
+// This is a workaround to import all files in order to show all source files in code coverage reports.
+
+glob.sync('**/*.ts').forEach((file) => {
+    if (file.indexOf('node_modules/') < 0 && file.indexOf('types/') < 0
+        && file.indexOf('.test.ts') < 0 && file.indexOf('main.ts')) {
+        try {
+            import('../../' + file);
+        } catch(err) {
+        }
+    }
+})
--- a/src/nni_manager/package.json
+++ b/src/nni_manager/package.json
@@ -5,16 +5,19 @@
  "scripts": {
    "postbuild": "cp -rf scripts ./dist/ && cp -rf config ./dist/",
    "build": "tsc",
-    "test": "mocha -r ts-node/register -t 15000 --recursive **/*.test.ts --exclude node_modules/**/**/*.test.ts --exclude core/test/nnimanager.test.ts --colors",
+    "test": "nyc mocha -r ts-node/register -t 15000 --recursive **/*.test.ts --exclude node_modules/**/**/*.test.ts --exclude core/test/nnimanager.test.ts --colors",
    "start": "node dist/main.js",
    "tslint": "tslint -p ."
  },
  "license": "MIT",
  "dependencies": {
+    "azure-storage": "^2.10.2",
    "chai-as-promised": "^7.1.1",
    "child-process-promise": "^2.2.1",
    "express": "^4.16.3",
    "express-joi-validator": "^2.0.0",
+    "js-base64": "^2.4.9",
+    "kubernetes-client": "^6.5.0",
    "node-nvidia-smi": "^1.0.0",
    "rx": "^4.1.0",
    "sqlite3": "^4.0.2",
@@ -25,15 +28,13 @@
    "ts-deferred": "^1.0.4",
    "typescript-ioc": "^1.2.4",
    "typescript-string-operations": "^1.3.1",
-    "webhdfs": "^1.2.0",
-    "azure-storage": "^2.10.2",
-    "kubernetes-client": "^6.5.0",
-    "js-base64": "^2.4.9"
+    "webhdfs": "^1.2.0"
  },
  "devDependencies": {
    "@types/chai": "^4.1.4",
    "@types/chai-as-promised": "^7.1.0",
    "@types/express": "^4.16.0",
+    "@types/glob": "^7.1.1",
    "@types/mocha": "^5.2.5",
    "@types/node": "^10.12.18",
    "@types/request": "^2.47.1",
@@ -43,7 +44,9 @@
    "@types/stream-buffers": "^3.0.2",
    "@types/tmp": "^0.0.33",
    "chai": "^4.1.2",
+    "glob": "^7.1.3",
    "mocha": "^5.2.0",
+    "nyc": "^13.1.0",
    "request": "^2.87.0",
    "rmdir": "^1.2.0",
    "tmp": "^0.0.33",
@@ -54,5 +57,27 @@
  },
  "engines": {
    "node": ">=10.0.0"
+  },
+  "nyc": {
+    "include": [
+      "**/*.ts"
+    ],
+    "exclude": [
+      "**/test/*",
+      "./node_modules/"
+    ],
+    "extension": [
+      ".ts",
+      ".tsx"
+    ],
+    "require": [
+      "ts-node/register"
+    ],
+    "reporter": [
+      "text-summary",
+      "html"
+    ],
+    "sourceMap": true,
+    "instrument": true
  }
 }
--- a/src/nni_manager/yarn.lock
+++ b/src/nni_manager/yarn.lock
--- a/src/sdk/pynni/nni/__main__.py
+++ b/src/sdk/pynni/nni/__main__.py
@@ -34,6 +34,10 @@ from nni.multi_phase.multi_phase_dispatcher import MultiPhaseMsgDispatcher
 logger = logging.getLogger('nni.main')
 logger.debug('START')

+if os.environ.get('COVERAGE_PROCESS_START'):
+    import coverage
+    coverage.process_startup()
+
 def augment_classargs(input_class_args, classname):
    if classname in ClassArgs:
        for key, value in ClassArgs[classname].items():

--- a/src/sdk/pynni/nni/msg_dispatcher_base.py
+++ b/src/sdk/pynni/nni/msg_dispatcher_base.py
@@ -46,7 +46,7 @@ class MsgDispatcherBase(Recoverable):
        while True:
            _logger.debug('waiting receive_message')
            command, data = receive()
-            if command is None:
+            if command is None or command is CommandType.Terminate:
                break
            if multi_thread_enabled():
                self.pool.map_async(self.handle_request, [(command, data)])
@@ -64,11 +64,6 @@ class MsgDispatcherBase(Recoverable):

        _logger.debug('handle request: command: [{}], data: [{}]'.format(command, data))

-        if command is CommandType.Terminate:
-            # if receive Terminate command, exit process
-            _logger.info('Receive Terminate command from NNI manager, terminating')
-            exit(0)
-
        data = json_tricks.loads(data)

        command_handlers = {

--- a/src/sdk/pynni/setup.py
+++ b/src/sdk/pynni/setup.py
@@ -35,7 +35,8 @@ setuptools.setup(
        'hyperopt',
        'json_tricks',
        'numpy',
-        'scipy'
+        'scipy',
+        'coverage'
    ],
    package_data = {'nni': ['**/requirements.txt']},


--- a/src/sdk/pynni/ut.sh
+++ b/src/sdk/pynni/ut.sh
+#!/bin/bash
+coverage run setup.py test
+coverage html
--- a/test/.coveragerc
+++ b/test/.coveragerc
+# .coveragerc to control coverage.py
+[run]
+branch = True
+parallel = True
+data_file = ${COVERAGE_DATA_FILE}
+source = nni, nni_cmd, nni_trial_tool
+
+concurrency = multiprocessing
+
+[report]
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:
+
+ignore_errors = True
+
+[html]
+directory = ${COVERAGE_HTML_DIR}
--- a/test/it.sh
+++ b/test/it.sh
+#!/bin/bash
+CWD=${PWD}
+
+## Export certain environment variables for unittest code to work
+export COVERAGE_PROCESS_START=${CWD}/.coveragerc
+export COVERAGE_DATA_FILE=${CWD}/coverage/data
+export COVERAGE_HTML_DIR=${CWD}/coverhtml
+
+rm ${COVERAGE_DATA_FILE}*
+rm -rf ${COVERAGE_HTML_DIR}/*
+mkdir ${CWD}/coverage
+mkdir ${COVERAGE_HTML_DIR}
+
+## ------Run integration test------
+echo "===========================Testing: integration test==========================="
+coverage run sdk_test.py
+coverage combine
+coverage html
--- a/tools/nni_cmd/nnictl.py
+++ b/tools/nni_cmd/nnictl.py
@@ -28,6 +28,10 @@ from .package_management import *
 from .constants import *
 from .tensorboard_utils import *

+if os.environ.get('COVERAGE_PROCESS_START'):
+    import coverage
+    coverage.process_startup()
+
 def nni_info(*args):
    if args[0].version:
        print(pkg_resources.get_distribution('nni').version)