Merge pull request #212 from microsoft/master

merge master

Merge pull request #212 from microsoft/master
merge master
49972952 · SparkSnail · GitHub · e259d109 · 025e0b46 · 49972952
Unverified Commit 49972952 authored Nov 03, 2019 by SparkSnail Committed by GitHub Nov 03, 2019
19 changed files
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -13,8 +13,8 @@ jobs:
  - script: |
      python3 -m pip install torch==0.4.1 --user
      python3 -m pip install torchvision==0.2.1 --user
-      python3 -m pip install tensorflow==1.12.0 --user
+      python3 -m pip install tensorflow==1.13.1 --user
-    displayName: 'Install dependencies for integration'
+    displayName: 'Install dependencies'
  - script: |
      source install.sh
    displayName: 'Install nni toolkit via source code'
@@ -59,7 +59,7 @@ jobs:
      python3 -m pip install torch==0.4.1 --user
      python3 -m pip install torchvision==0.2.1 --user
      python3 -m pip install tensorflow==1.13.1 --user
-    displayName: 'Install dependencies for integration'
+    displayName: 'Install dependencies'
  - script: |
      source install.sh
    displayName: 'Install nni toolkit via source code'
@@ -79,3 +79,43 @@ jobs:
      cd test
      PATH=$HOME/Library/Python/3.7/bin:$PATH python3 cli_test.py
    displayName: 'nnicli test'
+- job: 'basic_test_pr_Windows'
+  pool:
+    vmImage: 'vs2017-win2016'
+  strategy:
+    matrix:
+      Python36:
+        PYTHON_VERSION: '3.6'
+  steps:
+  - script: |
+      powershell.exe -file install.ps1
+    displayName: 'Install nni toolkit via source code'
+  - script: |
+      python -m pip install scikit-learn==0.20.0 --user
+      python -m pip install keras==2.1.6 --user
+      python -m pip install https://download.pytorch.org/whl/cu90/torch-0.4.1-cp36-cp36m-win_amd64.whl --user
+      python -m pip install torchvision --user
+      python -m pip install tensorflow==1.13.1 --user
+    displayName: 'Install dependencies'
+  - script: |
+      cd test
+      powershell.exe -file unittest.ps1
+    displayName: 'unit test'
+  - script: |
+      cd test
+      python naive_test.py
+    displayName: 'Naive test'
+  - script: |
+      cd test
+      python tuner_test.py
+    displayName: 'Built-in tuners / assessors tests'
+  - script: |
+      cd test
+      python metrics_test.py
+    displayName: 'Trial job metrics test'
+  - script: |
+      cd test
+      PATH=$HOME/.local/bin:$PATH python3 cli_test.py
+    displayName: 'nnicli test'
--- a/docs/en_US/Makefile
+++ b/docs/en_US/Makefile
@@ -16,4 +16,4 @@ help:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
--- a/docs/en_US/Tuner/BuiltinTuner.md
+++ b/docs/en_US/Tuner/BuiltinTuner.md
@@ -122,7 +122,7 @@ Its requirement of computation resource is relatively high. Specifically, it req
 * **optimize_mode** (*maximize or minimize, optional, default = maximize*) - If 'maximize', the tuner will target to maximize metrics. If 'minimize', the tuner will target to minimize metrics.
-* **population_size** (*int value(should >0), optional, default = 20*) - the initial size of the population(trial num) in evolution tuner.
+* **population_size** (*int value (should > 0), optional, default = 20*) - the initial size of the population(trial num) in evolution tuner. Suggests `population_size` be much larger than `concurrency`, so users can get the most out of the algorithm (and at least `concurrency`, or the tuner will fail on their first generation of parameters).
 **Usage example**

--- a/docs/en_US/sdk_reference.rst
+++ b/docs/en_US/sdk_reference.rst
@@ -41,10 +41,13 @@ Assessor
 ..  autoclass:: nni.assessor.Assessor
    :members:
-..  autoclass:: nni.curvefitting_assessor.curvefitting_assessor.CurvefittingAssessor
+..  autoclass:: nni.assessor.AssessResult
    :members:
-..  autoclass:: nni.medianstop_assessor.medianstop_assessor.MedianstopAssessor
+..  autoclass:: nni.curvefitting_assessor.CurvefittingAssessor
+    :members:
+..  autoclass:: nni.medianstop_assessor.MedianstopAssessor
    :members:
@@ -57,4 +60,4 @@ Advisor
    :members:
 ..  autoclass:: nni.bohb_advisor.bohb_advisor.BOHB
    :members:
\ No newline at end of file
--- a/pylintrc
+++ b/pylintrc
@@ -28,7 +28,7 @@ enable= unused-wildcard-import,
        line-too-long,
        unused-variable,
        wildcard-import,
-        useless-super-delegation,
+#        useless-super-delegation,
        len-as-condition,
        logging-format-interpolation,
        redefined-builtin,
@@ -42,7 +42,7 @@ enable= unused-wildcard-import,
 #        too-many-branches,
 #        protected-access 
-ignore-patterns=test.py
+ignore-patterns=test*
 # List of members which are set dynamically and missed by pylint inference
 generated-members=numpy.*,torch.*
--- a/src/nni_manager/training_service/remote_machine/remoteMachineData.ts
+++ b/src/nni_manager/training_service/remote_machine/remoteMachineData.ts
@@ -209,7 +209,8 @@ export class SSHClientManager {
        const connectConfig: ConnectConfig = {
            host: this.rmMeta.ip,
            port: this.rmMeta.port,
-            username: this.rmMeta.username };
+            username: this.rmMeta.username,
+            tryKeyboard: true };
        if (this.rmMeta.passwd !== undefined) {
            connectConfig.password = this.rmMeta.passwd;
        } else if (this.rmMeta.sshKeyPath !== undefined) {
@@ -231,6 +232,8 @@ export class SSHClientManager {
          .on('error', (err: Error) => {
            // SSH connection error, reject with error message
            deferred.reject(new Error(err.message));
+        }).on("keyboard-interactive", (name, instructions, lang, prompts, finish) => {
+            finish([this.rmMeta.passwd]);
        })
          .connect(connectConfig);

--- a/src/sdk/pynni/nni/assessor.py
+++ b/src/sdk/pynni/nni/assessor.py
@@ -18,44 +18,118 @@
 # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 # ==================================================================================================
+"""
+Assessor analyzes trial's intermediate results (e.g., periodically evaluated accuracy on test dataset)
+to tell whether this trial can be early stopped or not.
+See :class:`Assessor`' specification and ``docs/en_US/assessors.rst`` for details.
+"""
-import logging
 from enum import Enum
+import logging
 from .recoverable import Recoverable
+__all__ = ['AssessResult', 'Assessor']
 _logger = logging.getLogger(__name__)
 class AssessResult(Enum):
+    """
+    Enum class for :meth:`Assessor.assess_trial` return value.
+    """
    Good = True
+    """The trial works well."""
    Bad = False
+    """The trial works poorly and should be early stopped."""
 class Assessor(Recoverable):
+    """
+    Assessor analyzes trial's intermediate results (e.g., periodically evaluated accuracy on test dataset)
+    to tell whether this trial can be early stopped or not.
+    This is the abstract base class for all assessors.
+    Early stopping algorithms should derive this class and override :meth:`assess_trial` method,
+    which receives intermediate results from trials and give an assessing result.
+    If :meth:`assess_trial` returns :obj:`AssessResult.Bad` for a trial,
+    it hints NNI framework that the trial is likely to result in a poor final accuracy,
+    and therefore should be killed to save resource.
+    If an accessor want's to get notified when a trial ends, it can also override :meth:`trial_end`.
+    To write a new assessor, you can reference :class:`~nni.medianstop_assessor.MedianstopAssessor`'s code as an example.
+    See Also
+    --------
+    Builtin assessors:
+    :class:`~nni.medianstop_assessor.MedianstopAssessor`
+    :class:`~nni.curvefitting_assessor.CurvefittingAssessor`
+    """
    def assess_trial(self, trial_job_id, trial_history):
-        """Determines whether a trial should be killed. Must override.
+        """
-        trial_job_id: identifier of the trial (str).
+        Abstract method for determining whether a trial should be killed. Must override.
-        trial_history: a list of intermediate result objects.
-        Returns AssessResult.Good or AssessResult.Bad.
+        The NNI framework has little guarantee on ``trial_history``.
+        This method is not guaranteed to be invoked for each time ``trial_history`` get updated.
+        It is also possible that a trial's history keeps updateing after receiving a bad result.
+        And if the trial failed and retried, ``trial_history`` may be inconsistent with its previous value.
+        The only guarantee is that ``trial_history`` is always growing.
+        It will not be empty and will always be longer than previous value.
+        This is an example of how :meth:`assess_trial` get invoked sequentially:
+        ::
+            trial_job_id | trial_history   | return value
+            ------------ | --------------- | ------------
+            Trial_A      | [1.0, 2.0]      | Good
+            Trial_B      | [1.5, 1.3]      | Bad
+            Trial_B      | [1.5, 1.3, 1.9] | Good
+            Trial_A      | [0.9, 1.8, 2.3] | Good
+        Parameters
+        ----------
+        trial_job_id: str
+            Unique identifier of the trial.
+        trial_history: list
+            Intermediate results of this trial. The element type is decided by trial code.
+        Returns
+        -------
+        AssessResult
+            :obj:`AssessResult.Good` or :obj:`AssessResult.Bad`.
        """
        raise NotImplementedError('Assessor: assess_trial not implemented')
    def trial_end(self, trial_job_id, success):
-        """Invoked when a trial is completed or terminated. Do nothing by default.
+        """
-        trial_job_id: identifier of the trial (str).
+        Abstract method invoked when a trial is completed or terminated. Do nothing by default.
-        success: True if the trial successfully completed; False if failed or terminated.
+        Parameters
+        ----------
+        trial_job_id: str
+            Unique identifier of the trial.
+        success: bool
+            True if the trial successfully completed; False if failed or terminated.
        """
    def load_checkpoint(self):
-        """Load the checkpoint of assessr.
+        """
-        path: checkpoint directory for assessor
+        Internal API under revising, not recommended for end users.
        """
        checkpoin_path = self.get_checkpoint_path()
        _logger.info('Load checkpoint ignored by assessor, checkpoint path: %s', checkpoin_path)
    def save_checkpoint(self):
-        """Save the checkpoint of assessor.
+        """
-        path: checkpoint directory for assessor
+        Internal API under revising, not recommended for end users.
        """
        checkpoin_path = self.get_checkpoint_path()
        _logger.info('Save checkpoint ignored by assessor, checkpoint path: %s', checkpoin_path)

--- a/src/sdk/pynni/nni/curvefitting_assessor/__init__.py
+++ b/src/sdk/pynni/nni/curvefitting_assessor/__init__.py
+from .curvefitting_assessor import CurvefittingAssessor
--- a/src/sdk/pynni/nni/medianstop_assessor/__init__.py
+++ b/src/sdk/pynni/nni/medianstop_assessor/__init__.py
+from .medianstop_assessor import MedianstopAssessor
--- a/src/sdk/pynni/nni/tuner.py
+++ b/src/sdk/pynni/nni/tuner.py
@@ -17,31 +17,128 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
 # OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 # ==================================================================================================
+"""
+Tuner is an AutoML algorithm, which generates a new configuration for the next try.
+A new trial will run with this configuration.
+See :class:`Tuner`' specification and ``docs/en_US/tuners.rst`` for details.
+"""
 import logging
 import nni
 from .recoverable import Recoverable
+__all__ = ['Tuner']
 _logger = logging.getLogger(__name__)
 class Tuner(Recoverable):
+    """
+    Tuner is an AutoML algorithm, which generates a new configuration for the next try.
+    A new trial will run with this configuration.
+    This is the abstract base class for all tuners.
+    Tuning algorithms should derive this class and override :meth:`update_search_space`, :meth:`receive_trial_result`,
+    as well as :meth:`generate_parameters` or :meth:`generate_multiple_parameters`.
+    After initializing, NNI will first call :meth:`update_search_space` to tell tuner the feasible region,
+    and then call :meth:`generate_parameters` one or more times to request for hyper-parameter configurations.
+    The framework will train several models with given configuration.
+    When one of them is finished, the final accuracy will be reported to :meth:`receive_trial_result`.
+    And then another configuration will be reqeusted and trained, util the whole experiment finish.
+    If a tuner want's to know when a trial ends, it can also override :meth:`trial_end`.
+    Tuners use *parameter ID* to track trials.
+    In tuner context, there is a one-to-one mapping between parameter ID and trial.
+    When the framework ask tuner to generate hyper-parameters for a new trial,
+    an ID has already been assigned and can be recorded in :meth:`generate_parameters`.
+    Later when the trial ends, the ID will be reported to :meth:`trial_end`,
+    and :meth:`receive_trial_result` if it has a final result.
+    Parameter IDs are unique integers.
+    The type/format of search space and hyper-parameters are not limited,
+    as long as they are JSON-serializable and in sync with trial code.
+    For HPO tuners, however, there is a widely shared common interface,
+    which supports ``choice``, ``randint``, ``uniform``, and so on.
+    See ``docs/en_US/Tutorial/SearchSpaceSpec.md`` for details of this interface.
+    [WIP] For advanced tuners which take advantage of trials' intermediate results,
+    an ``Advisor`` interface is under development.
+    See Also
+    --------
+    Builtin tuners:
+    :class:`~nni.hyperopt_tuner.hyperopt_tuner.HyperoptTuner`
+    :class:`~nni.evolution_tuner.evolution_tuner.EvolutionTuner`
+    :class:`~nni.smac_tuner.smac_tuner.SMACTuner`
+    :class:`~nni.gridsearch_tuner.gridsearch_tuner.GridSearchTuner`
+    :class:`~nni.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner`
+    :class:`~nni.metis_tuner.mets_tuner.MetisTuner`
+    """
    def generate_parameters(self, parameter_id, **kwargs):
-        """Returns a set of trial (hyper-)parameters, as a serializable object.
+        """
-        User code must override either this function or 'generate_multiple_parameters()'.
+        Abstract method which provides a set of hyper-parameters.
+        This method will get called when the framework is about to launch a new trial,
+        if user does not override :meth:`generate_multiple_parameters`.
+        The return value of this method will be received by trials via :func:`nni.get_next_parameter`.
+        It should fit in the search space, though the framework will not verify this.
+        User code must override either this method or :meth:`generate_multiple_parameters`.
+        Parameters
+        ----------
        parameter_id: int
+            Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
+        **kwargs:
+            Unstable parameters which should be ignored by normal users.
+        Returns
+        -------
+        any
+            The hyper-parameters, a dict in most cases, but could be any JSON-serializable type when needed.
+        Raises
+        ------
+        nni.NoMoreTrialError
+            If the search space is fully explored, tuner can raise this exception.
        """
+        # FIXME: some tuners raise NoMoreTrialError when they are waiting for more trial results
+        # we need to design a new exception for this purpose
        raise NotImplementedError('Tuner: generate_parameters not implemented')
    def generate_multiple_parameters(self, parameter_id_list, **kwargs):
-        """Returns multiple sets of trial (hyper-)parameters, as iterable of serializable objects.
+        """
-        Call 'generate_parameters()' by 'count' times by default.
+        Callback method which provides multiple sets of hyper-parameters.
-        User code must override either this function or 'generate_parameters()'.
-        If there's no more trial, user should raise nni.NoMoreTrialError exception in generate_parameters().
+        This method will get called when the framework is about to launch one or more new trials.
-        If so, this function will only return sets of trial (hyper-)parameters that have already been collected.
+        If user does not override this method, it will invoke :meth:`generate_parameters` on each parameter ID.
+        See :meth:`generate_parameters` for details.
+        User code must override either this method or :meth:`generate_parameters`.
+        Parameters
+        ----------
        parameter_id_list: list of int
+            Unique identifiers for each set of requested hyper-parameters.
+            These will later be used in :meth:`receive_trial_result`.
+        **kwargs:
+            Unstable parameters which should be ignored by normal users.
+        Returns
+        -------
+        list
+            List of hyper-parameters. An empty list indicates there are no more trials.
        """
        result = []
        for parameter_id in parameter_id_list:
@@ -54,56 +151,85 @@ class Tuner(Recoverable):
        return result
    def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
-        """Invoked when a trial reports its final result. Must override.
+        """
-        By default this only reports results of algorithm-generated hyper-parameters.
+        Abstract method invoked when a trial reports its final result. Must override.
-        Use `accept_customized_trials()` to receive results from user-added parameters.
+        This method only listens to results of algorithm-generated hyper-parameters.
+        Currently customized trials added from web UI will not report result to this method.
+        Parameters
+        ----------
        parameter_id: int
-        parameters: object created by 'generate_parameters()'
+            Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`.
-        value: object reported by trial
+        parameters
-        customized: bool, true if the trial is created from web UI, false if generated by algorithm
+            Hyper-parameters generated by :meth:`generate_parameters`.
-        trial_job_id: str, only available in multiphase mode.
+        value
+            Result from trial (the return value of :func:`nni.report_final_result`).
+        **kwargs:
+            Unstable parameters which should be ignored by normal users.
        """
        raise NotImplementedError('Tuner: receive_trial_result not implemented')
-    def accept_customized_trials(self, accept=True):
+    def _accept_customized_trials(self, accept=True):
-        """Enable or disable receiving results of user-added hyper-parameters.
+        # FIXME: because Tuner is designed as interface, this API should not be here
-        By default `receive_trial_result()` will only receive results of algorithm-generated hyper-parameters.
-        If tuners want to receive those of customized parameters as well, they can call this function in `__init__()`.
+        # Enable or disable receiving results of user-added hyper-parameters.
-        """
+        # By default `receive_trial_result()` will only receive results of algorithm-generated hyper-parameters.
+        # If tuners want to receive those of customized parameters as well, they can call this function in `__init__()`.
        # pylint: disable=attribute-defined-outside-init
-        # FIXME: because tuner is designed as interface, this API should not be here
        self._accept_customized = accept
    def trial_end(self, parameter_id, success, **kwargs):
-        """Invoked when a trial is completed or terminated. Do nothing by default.
+        """
+        Abstract method invoked when a trial is completed or terminated. Do nothing by default.
+        Parameters
+        ----------
        parameter_id: int
-        success: True if the trial successfully completed; False if failed or terminated
+            Unique identifier for hyper-parameters used by this trial.
+        success: bool
+            True if the trial successfully completed; False if failed or terminated.
+        **kwargs:
+            Unstable parameters which should be ignored by normal users.
        """
    def update_search_space(self, search_space):
-        """Update the search space of tuner. Must override.
+        """
-        search_space: JSON object
+        Abstract method for updating the search space. Must override.
+        Tuners are advised to support updating search space at run-time.
+        If a tuner can only set search space once before generating first hyper-parameters,
+        it should explicitly document this behaviour.
+        Parameters
+        ----------
+        search_space
+            JSON object defined by experiment owner.
        """
        raise NotImplementedError('Tuner: update_search_space not implemented')
    def load_checkpoint(self):
-        """Load the checkpoint of tuner.
+        """
-        path: checkpoint directory for tuner
+        Internal API under revising, not recommended for end users.
        """
        checkpoin_path = self.get_checkpoint_path()
        _logger.info('Load checkpoint ignored by tuner, checkpoint path: %s', checkpoin_path)
    def save_checkpoint(self):
-        """Save the checkpoint of tuner.
+        """
-        path: checkpoint directory for tuner
+        Internal API under revising, not recommended for end users.
        """
        checkpoin_path = self.get_checkpoint_path()
        _logger.info('Save checkpoint ignored by tuner, checkpoint path: %s', checkpoin_path)
    def import_data(self, data):
-        """Import additional data for tuning
-        data: a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
        """
+        Internal API under revising, not recommended for end users.
+        """
+        # Import additional data for tuning
+        # data: a list of dictionarys, each of which has at least two keys, 'parameter' and 'value'
+        pass
    def _on_exit(self):
        pass

--- a/src/sdk/pynni/tests/test_tuner.py
+++ b/src/sdk/pynni/tests/test_tuner.py
@@ -34,7 +34,7 @@ class NaiveTuner(Tuner):
        self.param = 0
        self.trial_results = []
        self.search_space = None
-        self.accept_customized_trials()
+        self._accept_customized_trials()
    def generate_parameters(self, parameter_id, **kwargs):
        # report Tuner's internal states to generated parameters,

--- a/test/metrics_test.py
+++ b/test/metrics_test.py
@@ -18,18 +18,23 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+import sys
+import os.path as osp
 import subprocess
 import time
 import traceback
 import json
 import requests
-from utils import get_experiment_status, get_yml_content, parse_max_duration_time, get_succeeded_trial_num
+from utils import get_experiment_status, get_yml_content, parse_max_duration_time, get_succeeded_trial_num, print_stderr
 from utils import GREEN, RED, CLEAR, STATUS_URL, TRIAL_JOBS_URL, METRICS_URL
 def run_test():
    '''run metrics test'''
-    config_file = 'metrics_test/metrics.test.yml'
+    if sys.platform == 'win32':
+        config_file = osp.join('metrics_test', 'metrics_win32.test.yml')
+    else:
+        config_file = osp.join('metrics_test', 'metrics.test.yml')
    print('Testing %s...' % config_file)
    proc = subprocess.run(['nnictl', 'create', '--config', config_file])
@@ -44,6 +49,7 @@ def run_test():
        #print('experiment status:', status)
        if status == 'DONE':
            num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
+            print_stderr(TRIAL_JOBS_URL)
            assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
            check_metrics()
            break
@@ -51,7 +57,7 @@ def run_test():
    assert status == 'DONE', 'Failed to finish in maxExecDuration'
 def check_metrics():
-    with open('metrics_test/expected_metrics.json', 'r') as f:
+    with open(osp.join('metrics_test', 'expected_metrics.json'), 'r') as f:
        expected_metrics = json.load(f)
    print(expected_metrics)
    metrics = requests.get(METRICS_URL).json()

--- a/test/metrics_test/metrics_win32.test.yml
+++ b/test/metrics_test/metrics_win32.test.yml
+authorName: nni
+experimentName: default_test
+maxExecDuration: 3m
+maxTrialNum: 1
+trialConcurrency: 1
+searchSpacePath: ./search_space.json
+tuner:
+  builtinTunerName: Random
+trial:
+  codeDir: .
+  command: python trial.py
+  gpuNum: 0
+useAnnotation: false
+multiPhase: false
+multiThread: false
+trainingServicePlatform: local
--- a/test/naive_test.py
+++ b/test/naive_test.py
@@ -18,6 +18,8 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+import sys
+import os.path as osp
 import json
 import subprocess
 import sys
@@ -30,10 +32,14 @@ from utils import GREEN, RED, CLEAR, EXPERIMENT_URL
 def naive_test():
    '''run naive integration test'''
    to_remove = ['tuner_search_space.json', 'tuner_result.txt', 'assessor_result.txt']
-    to_remove = list(map(lambda file: 'naive_test/' + file, to_remove))
+    to_remove = list(map(lambda file: osp.join('naive_test', file), to_remove))
    remove_files(to_remove)
-    proc = subprocess.run(['nnictl', 'create', '--config', 'naive_test/local.yml'])
+    if sys.platform == 'win32':
+        config_file = 'local_win32.yml'
+    else:
+        config_file = 'local.yml'
+    proc = subprocess.run(['nnictl', 'create', '--config', osp.join('naive_test' , config_file)])
    assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
    print('Spawning trials...')
@@ -44,8 +50,8 @@ def naive_test():
    for _ in range(120):
        time.sleep(1)
-        tuner_status = read_last_line('naive_test/tuner_result.txt')
+        tuner_status = read_last_line(osp.join('naive_test', 'tuner_result.txt'))
-        assessor_status = read_last_line('naive_test/assessor_result.txt')
+        assessor_status = read_last_line(osp.join('naive_test', 'assessor_result.txt'))
        experiment_status = is_experiment_done(nnimanager_log_path)
        assert tuner_status != 'ERROR', 'Tuner exited with error'
@@ -55,7 +61,7 @@ def naive_test():
            break
        if tuner_status is not None:
-            for line in open('naive_test/tuner_result.txt'):
+            for line in open(osp.join('naive_test', 'tuner_result.txt')):
                if line.strip() == 'ERROR':
                    break
                trial = int(line.split(' ')[0])
@@ -65,18 +71,20 @@ def naive_test():
    assert experiment_status, 'Failed to finish in 2 min'
-    ss1 = json.load(open('naive_test/search_space.json'))
+    ss1 = json.load(open(osp.join('naive_test', 'search_space.json')))
-    ss2 = json.load(open('naive_test/tuner_search_space.json'))
+    ss2 = json.load(open(osp.join('naive_test', 'tuner_search_space.json')))
    assert ss1 == ss2, 'Tuner got wrong search space'
-    tuner_result = set(open('naive_test/tuner_result.txt'))
+    tuner_result = set(open(osp.join('naive_test', 'tuner_result.txt')))
-    expected = set(open('naive_test/expected_tuner_result.txt'))
+    expected = set(open(osp.join('naive_test', 'expected_tuner_result.txt')))
    # Trials may complete before NNI gets assessor's result,
    # so it is possible to have more final result than expected
+    print('Tuner result:', tuner_result)
+    print('Expected tuner result:', expected)
    assert tuner_result.issuperset(expected), 'Bad tuner result'
-    assessor_result = set(open('naive_test/assessor_result.txt'))
+    assessor_result = set(open(osp.join('naive_test', 'assessor_result.txt')))
-    expected = set(open('naive_test/expected_assessor_result.txt'))
+    expected = set(open(osp.join('naive_test', 'expected_assessor_result.txt')))
    assert assessor_result == expected, 'Bad assessor result'
    subprocess.run(['nnictl', 'stop'])
@@ -85,10 +93,10 @@ def naive_test():
 def stop_experiment_test():
    '''Test `nnictl stop` command, including `nnictl stop exp_id` and `nnictl stop all`.
    Simple `nnictl stop` is not tested here since it is used in all other test code'''
-    subprocess.run(['nnictl', 'create', '--config', 'tuner_test/local.yml', '--port', '8080'], check=True)
+    subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8080'], check=True)
-    subprocess.run(['nnictl', 'create', '--config', 'tuner_test/local.yml', '--port', '8888'], check=True)
+    subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8888'], check=True)
-    subprocess.run(['nnictl', 'create', '--config', 'tuner_test/local.yml', '--port', '8989'], check=True)
+    subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8989'], check=True)
-    subprocess.run(['nnictl', 'create', '--config', 'tuner_test/local.yml', '--port', '8990'], check=True)
+    subprocess.run(['nnictl', 'create', '--config', osp.join('tuner_test', 'local.yml'), '--port', '8990'], check=True)
    # test cmd 'nnictl stop id`
    experiment_id = get_experiment_id(EXPERIMENT_URL)

--- a/test/naive_test/local_win32.yml
+++ b/test/naive_test/local_win32.yml
+authorName: nni
+experimentName: naive
+trialConcurrency: 3
+maxExecDuration: 1h
+maxTrialNum: 10
+#choice: local, remote
+trainingServicePlatform: local
+searchSpacePath: search_space.json
+#choice: true, false
+useAnnotation: false
+tuner:
+    codeDir: .
+    classFileName: naive_tuner.py
+    className: NaiveTuner
+    classArgs:
+        optimize_mode: maximize
+assessor:
+    codeDir: .
+    classFileName: naive_assessor.py
+    className: NaiveAssessor
+    classArgs:
+        optimize_mode: maximize
+trial:
+    command: python naive_trial.py
+    codeDir: .
+    gpuNum: 0
--- a/test/tuner_test.py
+++ b/test/tuner_test.py
@@ -18,6 +18,8 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+import sys
+import os.path as osp
 import subprocess
 import sys
 import time
@@ -30,9 +32,16 @@ TUNER_LIST = ['GridSearch', 'BatchTuner', 'TPE', 'Random', 'Anneal', 'Evolution'
 ASSESSOR_LIST = ['Medianstop']
+def get_config_file_path():
+    if sys.platform == 'win32':
+        config_file = osp.join('tuner_test', 'local_win32.yml')
+    else:
+        config_file = osp.join('tuner_test', 'local.yml')
+    return config_file
 def switch(dispatch_type, dispatch_name):
    '''Change dispatch in config.yml'''
-    config_path = 'tuner_test/local.yml'
+    config_path = get_config_file_path()
    experiment_config = get_yml_content(config_path)
    if dispatch_name in ['GridSearch', 'BatchTuner', 'Random']:
        experiment_config[dispatch_type.lower()] = {
@@ -56,7 +65,7 @@ def test_builtin_dispatcher(dispatch_type, dispatch_name):
    switch(dispatch_type, dispatch_name)
    print('Testing %s...' % dispatch_name)
-    proc = subprocess.run(['nnictl', 'create', '--config', 'tuner_test/local.yml'])
+    proc = subprocess.run(['nnictl', 'create', '--config', get_config_file_path()])
    assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
    nnimanager_log_path = get_nni_log_path(EXPERIMENT_URL)

--- a/test/tuner_test/local_win32.yml
+++ b/test/tuner_test/local_win32.yml
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+authorName: nni
+experimentName: test_sdk
+maxExecDuration: 1h
+maxTrialNum: 2
+searchSpacePath: search_space.json
+trainingServicePlatform: local
+trial:
+  codeDir: .
+  command: python naive_trial.py
+  gpuNum: 0
+trialConcurrency: 2
+tuner:
+  builtinTunerName: Evolution
+  classArgs:
+    optimize_mode: maximize
+useAnnotation: false
--- a/test/unittest.ps1
+++ b/test/unittest.ps1
 $CWD = $PWD
+$ErrorActionPreference = "Stop"
 # -------------For python unittest-------------
 ## ------Run annotation test------

--- a/test/utils.py
+++ b/test/utils.py
@@ -20,7 +20,6 @@
 import contextlib
 import collections
-import json
 import os
 import socket
 import sys
@@ -29,7 +28,7 @@ import requests
 import time
 import ruamel.yaml as yaml
-EXPERIMENT_DONE_SIGNAL = '"Experiment done"'
+EXPERIMENT_DONE_SIGNAL = 'Experiment done'
 GREEN = '\33[32m'
 RED = '\33[31m'
@@ -93,13 +92,11 @@ def get_nni_log_path(experiment_url):
 def is_experiment_done(nnimanager_log_path):
    '''check if the experiment is done successfully'''
    assert os.path.exists(nnimanager_log_path), 'Experiment starts failed'
-    if sys.platform == "win32":
-        cmds = ['type', nnimanager_log_path, '|', 'find', EXPERIMENT_DONE_SIGNAL]
+    with open(nnimanager_log_path, 'r') as f:
-    else:
+        log_content = f.read()
-        cmds = ['cat', nnimanager_log_path, '|', 'grep', EXPERIMENT_DONE_SIGNAL]
-    completed_process = subprocess.run(' '.join(cmds), shell=True)
+    return EXPERIMENT_DONE_SIGNAL in log_content
-    return completed_process.returncode == 0
 def get_experiment_status(status_url):
    nni_status = requests.get(status_url).json()
@@ -119,10 +116,12 @@ def print_stderr(trial_jobs_url):
    trial_jobs = requests.get(trial_jobs_url).json()
    for trial_job in trial_jobs:
        if trial_job['status'] == 'FAILED':
-            stderr_path = trial_job['stderrPath'].split(':')[-1]
            if sys.platform == "win32":
+                p = trial_job['stderrPath'].split(':')
+                stderr_path = ':'.join([p[-2], p[-1]])
                subprocess.run(['type', stderr_path], shell=True)
            else:
+                stderr_path = trial_job['stderrPath'].split(':')[-1]
                subprocess.run(['cat', stderr_path])
 def parse_max_duration_time(max_exec_duration):