NNI on Windows for NNI Local mode (#937)

cfda0dae · demianzhang · SparkSnail · 88ceed71 · cfda0dae · cfda0dae
Commit cfda0dae authored Apr 22, 2019 by demianzhang Committed by SparkSnail Apr 22, 2019
20 changed files
--- a/src/nni_manager/training_service/local/gpuScheduler.ts
+++ b/src/nni_manager/training_service/local/gpuScheduler.ts
@@ -25,9 +25,10 @@ import * as fs from 'fs';
 import * as os from 'os';
 import * as path from 'path';
 import { String } from 'typescript-string-operations';
+import { execMkdir, getScriptName, getgpuMetricsCollectorScriptContent, execScript, execTail, execRemove, execKill } from '../common/util'
 import { getLogger, Logger } from '../../common/log';
 import { delay } from '../../common/utils';
-import { GPU_INFO_COLLECTOR_FORMAT, GPUInfo, GPUSummary } from '../common/gpuData';
+import { GPUInfo, GPUSummary } from '../common/gpuData';

 /**
 * GPUScheduler for local training service
@@ -57,6 +58,19 @@ class GPUScheduler {
        }
    }

+    /**
+     * Generate gpu metric collector shell script in local machine, 
+     * used to run in remote machine, and will be deleted after uploaded from local. 
+     */
+    private async runGpuMetricsCollectorScript(): Promise<void> {
+        await execMkdir(this.gpuMetricCollectorScriptFolder);
+        //generate gpu_metrics_collector script
+        let gpuMetricsCollectorScriptPath: string = path.join(this.gpuMetricCollectorScriptFolder, getScriptName('gpu_metrics_collector'));
+        const gpuMetricsCollectorScriptContent: string = getgpuMetricsCollectorScriptContent(this.gpuMetricCollectorScriptFolder);
+        await fs.promises.writeFile(gpuMetricsCollectorScriptPath, gpuMetricsCollectorScriptContent, { encoding: 'utf8' });
+        execScript(gpuMetricsCollectorScriptPath)
+    }
+
    public getAvailableGPUIndices(): number[] {
        if (this.gpuSummary !== undefined) {
            return this.gpuSummary.gpuInfos.filter((info: GPUInfo) => info.activeProcessNum === 0)
@@ -78,33 +92,16 @@ class GPUScheduler {
        this.stopping = true;
        try {
            const pid: string = await fs.promises.readFile(path.join(this.gpuMetricCollectorScriptFolder, 'pid'), 'utf8');
-            await cpp.exec(`pkill -P ${pid}`);
-            await cpp.exec(`rm -rf ${this.gpuMetricCollectorScriptFolder}`);
+            await execKill(pid);
+            await execRemove(this.gpuMetricCollectorScriptFolder);
        } catch (error) {
            this.log.error(`GPU scheduler error: ${error}`);
        }
    }

-    /**
-     * Generate gpu metric collector shell script in local machine,
-     * used to run in remote machine, and will be deleted after uploaded from local.
-     */
-    private async runGpuMetricsCollectorScript(): Promise<void> {
-        await cpp.exec(`mkdir -p ${this.gpuMetricCollectorScriptFolder}`);
-        //generate gpu_metrics_collector.sh
-        const gpuMetricsCollectorScriptPath: string = path.join(this.gpuMetricCollectorScriptFolder, 'gpu_metrics_collector.sh');
-        const gpuMetricsCollectorScriptContent: string = String.Format(
-            GPU_INFO_COLLECTOR_FORMAT,
-            this.gpuMetricCollectorScriptFolder,
-            path.join(this.gpuMetricCollectorScriptFolder, 'pid')
-        );
-        await fs.promises.writeFile(gpuMetricsCollectorScriptPath, gpuMetricsCollectorScriptContent, { encoding: 'utf8' });
-        cp.exec(`bash ${gpuMetricsCollectorScriptPath}`);
-    }
-
    private async updateGPUSummary(): Promise<void> {
        const cmdresult: cpp.childProcessPromise.Result =
-            await cpp.exec(`tail -n 1 ${path.join(this.gpuMetricCollectorScriptFolder, 'gpu_metrics')}`);
+            await execTail(path.join(this.gpuMetricCollectorScriptFolder, 'gpu_metrics'));
        if (cmdresult && cmdresult.stdout) {
            this.gpuSummary = <GPUSummary>JSON.parse(cmdresult.stdout);
        } else {

--- a/src/nni_manager/training_service/local/localTrainingService.ts
+++ b/src/nni_manager/training_service/local/localTrainingService.ts
@@ -18,7 +18,6 @@
 */

 'use strict';
-
 import * as cpp from 'child-process-promise';
 import * as cp from 'child_process';
 import { EventEmitter } from 'events';
@@ -32,7 +31,8 @@ import {
    HostJobApplicationForm, HyperParameters, JobApplicationForm, TrainingService, TrialJobApplicationForm,
    TrialJobDetail, TrialJobMetric, TrialJobStatus
 } from '../../common/trainingService';
-import { delay, generateParamFileName, getExperimentRootDir, getJobCancelStatus, uniqueString } from '../../common/utils';
+import { delay, generateParamFileName, getExperimentRootDir, getJobCancelStatus, uniqueString, isAlive, getNewLine } from '../../common/utils';
+import { execMkdir, getScriptName, execScript, setEnvironmentVariable, execNewFile } from '../common/util'
 import { TrialConfig } from '../common/trialConfig';
 import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
 import { GPUScheduler } from './gpuScheduler';
@@ -169,14 +169,7 @@ class LocalTrainingService implements TrainingService {
            return this.getHostJob(trialJobId);
        }
        if (trialJob.status === 'RUNNING') {
-            let alive: boolean = false;
-            try {
-                await cpp.exec(`kill -0 ${trialJob.pid}`);
-                alive = true;
-            } catch (error) {
-                //ignore
-            }
-
+            let alive: boolean = await isAlive(trialJob.pid);
            if (!alive) {
                trialJob.endTime = Date.now();
                this.setTrialJobStatus(trialJob, 'FAILED');
@@ -284,7 +277,9 @@ class LocalTrainingService implements TrainingService {
    public async setClusterMetadata(key: string, value: string): Promise<void> {
        if (!this.initialized) {
            this.rootDir = getExperimentRootDir();
-            await cpp.exec(`mkdir -p ${this.rootDir}`);
+            if(!fs.existsSync(this.rootDir)){
+                await cpp.exec(`powershell.exe mkdir ${this.rootDir}`);
+            }
            this.initialized = true;
        }
        switch (key) {
@@ -381,7 +376,7 @@ class LocalTrainingService implements TrainingService {

        envVariables.push({
            key: 'CUDA_VISIBLE_DEVICES',
-            value: this.gpuScheduler === undefined ? '' : resource.gpuIndices.join(',')
+            value: this.gpuScheduler === undefined ? '-1' : resource.gpuIndices.join(',')
        });

        return envVariables;
@@ -465,36 +460,52 @@ class LocalTrainingService implements TrainingService {
        }
    }

+    private getScript(localTrailConfig: TrialConfig, workingDirectory: string): string[]{
+        let script: string[] = [];
+        if (process.platform === "win32") {
+            script.push(
+                `cmd /c ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
+                `$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`,
+                `$NOW_DATE = "$NOW_DATE" + "000"`,
+                `Write $LASTEXITCODE " " $NOW_DATE  | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`);
+        }
+        else{
+            script.push(
+                `eval ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
+                `echo $? \`date +%s000\` >${path.join(workingDirectory, '.nni', 'state')}`);
+        }
+        return script;
+    }
+
    private async runTrialJob(trialJobId: string, resource: {gpuIndices: number[]}): Promise<void> {
        const trialJobDetail: LocalTrialJobDetail = <LocalTrialJobDetail>this.jobMap.get(trialJobId);
        const variables: { key: string; value: string }[] = this.getEnvironmentVariables(trialJobDetail, resource);

-        const runScriptLines: string[] = [];
-
        if (!this.localTrailConfig) {
            throw new Error('trial config is not initialized');
        }
-        runScriptLines.push(
-            '#!/bin/bash',
-            `cd ${this.localTrailConfig.codeDir}`);
+        const runScriptLines: string[] = [];
+        if (process.platform !== "win32"){
+            runScriptLines.push('#!/bin/bash');
+        }
+        runScriptLines.push(`cd ${this.localTrailConfig.codeDir}`);
        for (const variable of variables) {
-            runScriptLines.push(`export ${variable.key}=${variable.value}`);
+            runScriptLines.push(setEnvironmentVariable(variable));
        }
-        runScriptLines.push(
-            `eval ${this.localTrailConfig.command} 2>${path.join(trialJobDetail.workingDirectory, 'stderr')}`,
-            `echo $? \`date +%s000\` >${path.join(trialJobDetail.workingDirectory, '.nni', 'state')}`);
-
-        await cpp.exec(`mkdir -p ${trialJobDetail.workingDirectory}`);
-        await cpp.exec(`mkdir -p ${path.join(trialJobDetail.workingDirectory, '.nni')}`);
-        await cpp.exec(`touch ${path.join(trialJobDetail.workingDirectory, '.nni', 'metrics')}`);
-        await fs.promises.writeFile(
-            path.join(trialJobDetail.workingDirectory, 'run.sh'), runScriptLines.join('\n'), { encoding: 'utf8', mode: 0o777 });
+        const scripts: string[] = this.getScript(this.localTrailConfig, trialJobDetail.workingDirectory);
+        scripts.forEach(script => {
+            runScriptLines.push(script); 
+        });
+        await execMkdir(trialJobDetail.workingDirectory);
+        await execMkdir(path.join(trialJobDetail.workingDirectory, '.nni'));
+        await execNewFile(path.join(trialJobDetail.workingDirectory, '.nni', 'metrics'));
+        const scriptName: string = getScriptName('run');
+        await fs.promises.writeFile(path.join(trialJobDetail.workingDirectory, scriptName), runScriptLines.join(getNewLine()), { encoding: 'utf8', mode: 0o777 });
        await this.writeParameterFile(trialJobDetail.workingDirectory, (<TrialJobApplicationForm>trialJobDetail.form).hyperParameters);
-        const process: cp.ChildProcess = cp.exec(`bash ${path.join(trialJobDetail.workingDirectory, 'run.sh')}`);
-
+        const trialJobProcess: cp.ChildProcess = execScript(path.join(trialJobDetail.workingDirectory, scriptName));
        this.setTrialJobStatus(trialJobDetail, 'RUNNING');
        trialJobDetail.startTime = Date.now();
-        trialJobDetail.pid = process.pid;
+        trialJobDetail.pid = trialJobProcess.pid;
        this.setExtraProperties(trialJobDetail, resource);

        let buffer: Buffer = Buffer.alloc(0);

--- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
+++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
@@ -46,7 +46,7 @@ import {
    RemoteMachineScheduleInfo, RemoteMachineScheduleResult, SSHClient, SSHClientManager,
    RemoteMachineTrialJobDetail, ScheduleResultType, REMOTEMACHINE_TRIAL_COMMAND_FORMAT
 } from './remoteMachineData';
-import { GPU_INFO_COLLECTOR_FORMAT } from '../common/gpuData';
+import { GPU_INFO_COLLECTOR_FORMAT_LINUX } from '../common/gpuData';
 import { SSHClientUtility } from './sshClientUtility';
 import { validateCodeDir } from '../common/util';
 import { RemoteMachineJobRestServer } from './remoteMachineJobRestServer';
@@ -452,7 +452,7 @@ class RemoteMachineTrainingService implements TrainingService {
        let gpuMetricsCollectorScriptPath: string = path.join(gpuMetricCollectorScriptFolder, userName, 'gpu_metrics_collector.sh');
        const remoteGPUScriptsDir: string = this.getRemoteScriptsPath(userName); // This directory is used to store gpu_metrics and pid created by script
        const gpuMetricsCollectorScriptContent: string = String.Format(
-            GPU_INFO_COLLECTOR_FORMAT, 
+            GPU_INFO_COLLECTOR_FORMAT_LINUX, 
            remoteGPUScriptsDir, 
            path.join(remoteGPUScriptsDir, 'pid'), 
        );

--- a/src/nni_manager/training_service/test/localTrainingService.test.ts
+++ b/src/nni_manager/training_service/test/localTrainingService.test.ts
@@ -31,7 +31,7 @@ import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
 import { LocalTrainingService } from '../local/localTrainingService';

 // TODO: copy mockedTrail.py to local folder
-const localCodeDir: string = tmp.dirSync().name
+const localCodeDir: string = tmp.dirSync().name.split('\\').join('\\\\');
 const mockedTrialPath: string = './training_service/test/mockedTrial.py'
 fs.copyFileSync(mockedTrialPath, localCodeDir + '/mockedTrial.py')


--- a/src/sdk/pynni/nni/common.py
+++ b/src/sdk/pynni/nni/common.py
@@ -33,7 +33,8 @@ log_level_map = {
    'debug': logging.DEBUG
 }

-_time_format = '%m/%d/%Y, %I:%M:%S %P'
+_time_format = '%m/%d/%Y, %I:%M:%S %p'
+    
 class _LoggerFileWrapper(TextIOBase):
    def __init__(self, logger_file):
        self.file = logger_file

--- a/src/sdk/pynni/nni/platform/local.py
+++ b/src/sdk/pynni/nni/platform/local.py
@@ -19,6 +19,7 @@
 # ==================================================================================================

 import os
+import sys
 import json
 import time
 import subprocess
@@ -87,6 +88,10 @@ def send_metric(string):
        assert len(data) < 1000000, 'Metric too long'    
        _metric_file.write(b'ME%06d%b' % (len(data), data))
        _metric_file.flush()
+        if sys.platform == "win32":
+            file = open(_metric_file.name)
+            file.close()
+        else:
            subprocess.run(['touch', _metric_file.name], check = True)

 def get_sequence_id():

--- a/test/generate_ts_config.py
+++ b/test/generate_ts_config.py
@@ -18,6 +18,8 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+import sys
+import glob
 import argparse
 from utils import get_yml_content, dump_yml_content

@@ -69,6 +71,19 @@ def update_training_service_config(args):

    dump_yml_content(TRAINING_SERVICE_FILE, config)

+def convert_command():
+    '''convert command by platform'''
+    if sys.platform != 'win32':
+        return None
+    config_files = glob.glob('./**/*.yml') + glob.glob('./**/**/*.yml')
+    for config_file in config_files:
+        print('processing {}'.format(config_file))
+        yml_content = get_yml_content(config_file)
+        if yml_content.get('trial'):
+            if yml_content['trial'].get('command'):
+                yml_content['trial']['command'] = yml_content['trial']['command'].replace('python3', 'python')
+                dump_yml_content(config_file, yml_content)
+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote'], default='pai')
@@ -96,3 +111,4 @@ if __name__ == '__main__':
    args = parser.parse_args()

    update_training_service_config(args)
+    convert_command()
--- a/test/pipelines-it-local-windows.yml
+++ b/test/pipelines-it-local-windows.yml
+jobs:
+- job: 'Test'
+
+  steps:
+  - script: |
+      powershell.exe -file install.ps1
+    displayName: 'Install nni toolkit via source code'
+  - script: |
+      python -m pip install scikit-learn==0.20.0 --user
+      python -m pip install keras==2.1.6 --user
+      python -m pip install https://download.pytorch.org/whl/cu90/torch-0.4.1-cp36-cp36m-win_amd64.whl --user
+      python -m pip install torchvision --user
+      python -m pip install tensorflow-gpu==1.11.0 --user
+    displayName: 'Install dependencies for integration tests'
+  - script: |
+      cd test
+      python generate_ts_config.py
+    displayName: 'generate config files'
+  - script: |
+      cd test
+      python config_test.py --ts local --local_gpu --exclude smac,bohb
+    displayName: 'Examples and advanced features tests on local machine'
+  - script: |
+      cd test
+      powershell.exe -file unittest.ps1
+    displayName: 'unit test'
+  - script: |
+      cd test
+      python naive_test.py
+    displayName: 'Naive test'
+  - script: |
+      cd test
+      python tuner_test.py
+    displayName: 'Built-in tuners / assessors tests'
+  - script: |
+      cd test
+      python metrics_test.py
+    displayName: 'Trial job metrics test'
--- a/test/unittest.ps1
+++ b/test/unittest.ps1
+$CWD = $PWD
+
+# -------------For python unittest-------------
+
+## ------Run annotation test------
+echo ""
+echo "===========================Testing: nni_annotation==========================="
+cd $CWD/../tools/
+python -m unittest -v nni_annotation/test_annotation.py 
+
+## Export certain environment variables for unittest code to work
+$env:NNI_TRIAL_JOB_ID="test_trial_job_id"
+$env:NNI_PLATFORM="unittest"
+
+## ------Run sdk test------
+echo ""
+echo "===========================Testing: nni_sdk==========================="
+cd $CWD/../src/sdk/pynni/
+python -m unittest discover -v tests
+
+
+
+# -------------For typescript unittest-------------
+cd $CWD/../src/nni_manager
+echo ""
+echo "===========================Testing: nni_manager==========================="
+npm run test
--- a/test/utils.py
+++ b/test/utils.py
@@ -22,6 +22,7 @@ import contextlib
 import collections
 import json
 import os
+import sys
 import subprocess
 import requests
 import ruamel.yaml as yaml
@@ -65,7 +66,7 @@ def dump_yml_content(file_path, content):
 def setup_experiment(installed=True):
    '''setup the experiment if nni is not installed'''
    if not installed:
-        os.environ['PATH'] = os.environ['PATH'] + ':' + os.environ['PWD']
+        os.environ['PATH'] = os.environ['PATH'] + ':' + os.getcwd()
        sdk_path = os.path.abspath('../src/sdk/pynni')
        cmd_path = os.path.abspath('../tools')
        pypath = os.environ.get('PYTHONPATH')
@@ -79,7 +80,7 @@ def fetch_nni_log_path(experiment_url):
    '''get nni's log path from nni's experiment url'''
    experiment_profile = requests.get(experiment_url)
    experiment_id = json.loads(experiment_profile.text)['id']
-    experiment_path = os.path.join(os.environ['HOME'], 'nni/experiments', experiment_id)
+    experiment_path = os.path.join(os.path.expanduser('~'), 'nni', 'experiments', experiment_id)
    nnimanager_log_path = os.path.join(experiment_path, 'log', 'nnimanager.log')

    return nnimanager_log_path
@@ -87,6 +88,9 @@ def fetch_nni_log_path(experiment_url):
 def is_experiment_done(nnimanager_log_path):
    '''check if the experiment is done successfully'''
    assert os.path.exists(nnimanager_log_path), 'Experiment starts failed'
+    if sys.platform == "win32":
+        cmds = ['type', nnimanager_log_path, '|', 'find', EXPERIMENT_DONE_SIGNAL]
+    else:
        cmds = ['cat', nnimanager_log_path, '|', 'grep', EXPERIMENT_DONE_SIGNAL]
    completed_process = subprocess.run(' '.join(cmds), shell=True)

@@ -112,6 +116,9 @@ def print_stderr(trial_jobs_url):
    for trial_job in trial_jobs:
        if trial_job['status'] == 'FAILED':
            stderr_path = trial_job['stderrPath'].split(':')[-1]
+            if sys.platform == "win32":
+                subprocess.run(['type', stderr_path], shell=True)
+            else:
                subprocess.run(['cat', stderr_path])

 def parse_max_duration_time(max_exec_duration):

--- a/tools/nni_annotation/__init__.py
+++ b/tools/nni_annotation/__init__.py
@@ -20,6 +20,7 @@


 import os
+import sys
 import shutil

 from . import code_generator
@@ -28,6 +29,9 @@ from . import search_space_generator

 __all__ = ['generate_search_space', 'expand_annotations']

+slash = '/'
+if sys.platform == "win32":
+    slash = '\\'        

 def generate_search_space(code_dir):
    """Generate search space from Python source code.
@@ -36,7 +40,7 @@ def generate_search_space(code_dir):
    """
    search_space = {}
    
-    if code_dir.endswith('/'):
+    if code_dir.endswith(slash):
        code_dir = code_dir[:-1]

    for subdir, _, files in os.walk(code_dir):
@@ -44,9 +48,9 @@ def generate_search_space(code_dir):
        if subdir == code_dir:
            package = ''
        else:
-            assert subdir.startswith(code_dir + '/'), subdir
+            assert subdir.startswith(code_dir + slash), subdir
            prefix_len = len(code_dir) + 1
-            package = subdir[prefix_len:].replace('/', '.') + '.'
+            package = subdir[prefix_len:].replace(slash, '.') + '.'

        for file_name in files:
            if file_name.endswith('.py'):
@@ -76,9 +80,10 @@ def expand_annotations(src_dir, dst_dir):
    src_dir: directory path of user code (str)
    dst_dir: directory to place generated files (str)
    """
-    if src_dir[-1] == '/':
+    if src_dir[-1] == slash:
        src_dir = src_dir[:-1]
-    if dst_dir[-1] == '/':
+    
+    if dst_dir[-1] == slash:
        dst_dir = dst_dir[:-1]

    annotated = False

--- a/tools/nni_cmd/command_utils.py
+++ b/tools/nni_cmd/command_utils.py
+from subprocess import call, check_output
+import sys
+import os
+import signal
+import psutil
+from .common_utils import  print_error, print_normal, print_warning
+
+def check_output_command(file_path, head=None, tail=None):
+    '''call check_output command to read content from a file'''
+    if os.path.exists(file_path):
+        if sys.platform == 'win32':
+            cmds = ['powershell.exe', 'type', file_path]
+            if head:
+                cmds += ['|', 'select', '-first', str(head)]
+            elif tail:
+                cmds += ['|', 'select', '-last', str(tail)]
+            return check_output(cmds, shell=True).decode('utf-8')
+        else:
+            cmds = ['cat', file_path]
+            if head:
+                cmds = ['head', '-' + str(head), file_path]
+            elif tail:
+                cmds = ['tail', '-' + str(tail), file_path]
+            return check_output(cmds, shell=False).decode('utf-8')
+    else:
+        print_error('{0} does not exist!'.format(file_path))
+        exit(1)
+
+def kill_command(pid):
+    '''kill command'''
+    if sys.platform == 'win32':
+        process = psutil.Process(pid=pid)
+        process.send_signal(signal.CTRL_BREAK_EVENT)
+    else:
+        cmds = ['kill', str(pid)]
+        call(cmds)
+
+def install_package_command(package_name):
+    '''install python package from pip'''
+    #TODO refactor python logic
+    if sys.platform == "win32":
+        cmds = 'python -m pip install --user {0}'.format(package_name)
+    else:
+        cmds = 'python3 -m pip install --user {0}'.format(package_name)
+    call(cmds, shell=True)
+
+def install_requirements_command(requirements_path):
+    '''install requirements.txt'''
+    cmds = 'cd ' + requirements_path + ' && {0} -m pip install --user -r requirements.txt'
+    #TODO refactor python logic
+    if sys.platform == "win32":
+        cmds = cmds.format('python')
+    else:
+        cmds = cmds.format('python3')
+    call(cmds, shell=True)
--- a/tools/nni_cmd/common_utils.py
+++ b/tools/nni_cmd/common_utils.py
@@ -18,10 +18,13 @@
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+import os
+import sys
 import json
 import ruamel.yaml as yaml
 import psutil
 import socket
+from pathlib import Path
 from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO, COLOR_RED_FORMAT, COLOR_YELLOW_FORMAT

 def get_yml_content(file_path):
@@ -71,3 +74,15 @@ def detect_port(port):
        return True
    except:
        return False
+
+def get_user():
+    if sys.platform =='win32':
+        return os.environ['USERNAME']
+    else:
+        return os.environ['USER']
+
+def get_python_dir(sitepackages_path):
+    if sys.platform == "win32":
+        return str(Path(sitepackages_path))
+    else:
+        return str(Path(sitepackages_path).parents[2])
\ No newline at end of file
--- a/tools/nni_cmd/constants.py
+++ b/tools/nni_cmd/constants.py
@@ -19,8 +19,9 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 import os
+from colorama import Fore

-NNICTL_HOME_DIR = os.path.join(os.environ['HOME'], '.local',  'nnictl')
+NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local',  'nnictl')

 ERROR_INFO = 'ERROR: %s'

@@ -32,7 +33,7 @@ DEFAULT_REST_PORT = 8080

 REST_TIME_OUT = 20

-EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0m' \
+EXPERIMENT_SUCCESS_INFO = Fore.GREEN + 'Successfully started experiment!\n' + Fore.RESET + \
                          '-----------------------------------------------------------------------\n' \
                          'The experiment id is %s\n'\
                          'The Web UI urls are: %s\n' \
@@ -94,11 +95,11 @@ TUNERS_NO_NEED_TO_IMPORT_DATA = {
    'Hyperband'
 }

-COLOR_RED_FORMAT = '\033[1;31;31m%s\033[0m'
+COLOR_RED_FORMAT = Fore.RED + '%s'

-COLOR_GREEN_FORMAT = '\033[1;32;32m%s\033[0m'
+COLOR_GREEN_FORMAT = Fore.GREEN + '%s'

-COLOR_YELLOW_FORMAT = '\033[1;33;33m%s\033[0m'
+COLOR_YELLOW_FORMAT = Fore.YELLOW + '%s'

 SCHEMA_TYPE_ERROR = '%s should be %s type!'


--- a/tools/nni_cmd/launcher.py
+++ b/tools/nni_cmd/launcher.py
@@ -32,12 +32,13 @@ from .launcher_utils import validate_all_content
 from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick, check_response
 from .url_utils import cluster_metadata_url, experiment_url, get_local_urls
 from .config_utils import Config, Experiments
-from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process, detect_port
+from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process, detect_port, get_user, get_python_dir
 from .constants import *
 import random
 import site
 import time
 from pathlib import Path
+from .command_utils import check_output_command, kill_command

 def get_log_path(config_file_name):
    '''generate stdout and stderr log path'''
@@ -49,14 +50,10 @@ def print_log_content(config_file_name):
    '''print log information'''
    stdout_full_path, stderr_full_path = get_log_path(config_file_name)
    print_normal(' Stdout:')
-    stdout_cmds = ['cat', stdout_full_path]
-    stdout_content = check_output(stdout_cmds)
-    print(stdout_content.decode('utf-8'))
+    print(check_output_command(stdout_full_path))
    print('\n\n')
    print_normal(' Stderr:')
-    stderr_cmds = ['cat', stderr_full_path]
-    stderr_content = check_output(stderr_cmds)
-    print(stderr_content.decode('utf-8'))
+    print(check_output_command(stderr_full_path))

 def get_nni_installation_path():
    ''' Find nni lib from the following locations in order
@@ -67,7 +64,7 @@ def get_nni_installation_path():
        Return None if nothing is found
        '''
        def _generate_installation_path(sitepackages_path):
-            python_dir = str(Path(sitepackages_path).parents[2])
+            python_dir = get_python_dir(sitepackages_path)
            entry_file = os.path.join(python_dir, 'nni', 'main.js')
            if os.path.isfile(entry_file):
                return python_dir
@@ -132,6 +129,10 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
    log_header = LOG_HEADER % str(time_now)
    stdout_file.write(log_header)
    stderr_file.write(log_header)
+    if sys.platform == 'win32':
+        from subprocess import CREATE_NEW_PROCESS_GROUP
+        process = Popen(cmds, cwd=entry_dir, stdout=stdout_file, stderr=stderr_file, creationflags=CREATE_NEW_PROCESS_GROUP)
+    else:
        process = Popen(cmds, cwd=entry_dir, stdout=stdout_file, stderr=stderr_file)
    return process, str(time_now)

@@ -357,7 +358,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
    nni_config.set_config('restServerPid', rest_process.pid)
    # Deal with annotation
    if experiment_config.get('useAnnotation'):
-        path = os.path.join(tempfile.gettempdir(), os.environ['USER'], 'nni', 'annotation')
+        path = os.path.join(tempfile.gettempdir(), get_user(), 'nni', 'annotation')
        if not os.path.isdir(path):
            os.makedirs(path)
        path = tempfile.mkdtemp(dir=path)
@@ -380,8 +381,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
        print_error('Restful server start failed!')
        print_log_content(config_file_name)
        try:
-            cmds = ['kill', str(rest_process.pid)]
-            call(cmds)
+            kill_command(rest_process.pid)
        except Exception:
            raise Exception(ERROR_INFO % 'Rest server stopped!')
        exit(1)
@@ -395,8 +395,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
        else:
            print_error('Failed! Error is: {}'.format(err_msg))
            try:
-                cmds = ['kill', str(rest_process.pid)]
-                call(cmds)
+                kill_command(rest_process.pid)
            except Exception:
                raise Exception(ERROR_INFO % 'Rest server stopped!')
            exit(1)
@@ -409,8 +408,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
        else:
            print_error('Set local config failed!')
            try:
-                cmds = ['kill', str(rest_process.pid)]
-                call(cmds)
+                kill_command(rest_process.pid)
            except Exception:
                raise Exception(ERROR_INFO % 'Rest server stopped!')
            exit(1)
@@ -425,8 +423,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
            if err_msg:
                print_error('Failed! Error is: {}'.format(err_msg))
            try:
-                cmds = ['kill', str(rest_process.pid)]
-                call(cmds)
+                kill_command(rest_process.pid)
            except Exception:
                raise Exception(ERROR_INFO % 'Restful server stopped!')
            exit(1)
@@ -441,8 +438,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
            if err_msg:
                print_error('Failed! Error is: {}'.format(err_msg))
            try:
-                cmds = ['pkill', str(rest_process.pid)]
-                call(cmds)
+                kill_command(rest_process.pid)
            except Exception:
                raise Exception(ERROR_INFO % 'Restful server stopped!')
            exit(1)
@@ -457,8 +453,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
            if err_msg:
                print_error('Failed! Error is: {}'.format(err_msg))
            try:
-                cmds = ['pkill', str(rest_process.pid)]
-                call(cmds)
+                kill_command(rest_process.pid)
            except Exception:
                raise Exception(ERROR_INFO % 'Restful server stopped!')
            exit(1)
@@ -477,8 +472,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
        print_error('Start experiment failed!')
        print_log_content(config_file_name)
        try:
-            cmds = ['kill', str(rest_process.pid)]
-            call(cmds)
+            kill_command(rest_process.pid)
        except Exception:
            raise Exception(ERROR_INFO % 'Restful server stopped!')
        exit(1)

--- a/tools/nni_cmd/nnictl.py
+++ b/tools/nni_cmd/nnictl.py
@@ -27,6 +27,8 @@ from .nnictl_utils import *
 from .package_management import *
 from .constants import *
 from .tensorboard_utils import *
+from colorama import init
+init(autoreset=True)

 if os.environ.get('COVERAGE_PROCESS_START'):
    import coverage

--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py
@@ -24,7 +24,6 @@ import psutil
 import json
 import datetime
 import time
-
 from subprocess import call, check_output
 from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
 from .config_utils import Config, Experiments
@@ -32,6 +31,7 @@ from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url
 from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
     EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT
 from .common_utils import print_normal, print_error, print_warning, detect_process
+from .command_utils import check_output_command, kill_command

 def get_experiment_time(port):
    '''get the startTime and endTime of an experiment'''
@@ -219,14 +219,12 @@ def stop_experiment(args):
            rest_port = nni_config.get_config('restServerPort')
            rest_pid = nni_config.get_config('restServerPid')
            if rest_pid:
-                stop_rest_cmds = ['kill', str(rest_pid)]
-                call(stop_rest_cmds)
+                kill_command(rest_pid)
                tensorboard_pid_list = nni_config.get_config('tensorboardPidList')
                if tensorboard_pid_list:
                    for tensorboard_pid in tensorboard_pid_list:
                        try:
-                            cmds = ['kill', '-9', str(tensorboard_pid)]
-                            call(cmds)
+                            kill_command(tensorboard_pid)
                        except Exception as exception:
                            print_error(exception)
                    nni_config.set_config('tensorboardPidList', [])
@@ -303,14 +301,6 @@ def experiment_status(args):
    else:
        print(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))

-def get_log_content(file_name, cmds):
-    '''use cmds to read config content'''
-    if os.path.exists(file_name):
-        rest = check_output(cmds)
-        print(rest.decode('utf-8'))
-    else:
-        print_normal('NULL!')
-
 def log_internal(args, filetype):
    '''internal function to call get_log_content'''
    file_name = get_config_filename(args)
@@ -318,14 +308,7 @@ def log_internal(args, filetype):
        file_full_path = os.path.join(NNICTL_HOME_DIR, file_name, 'stdout')
    else:
        file_full_path = os.path.join(NNICTL_HOME_DIR, file_name, 'stderr')
-    if args.head:
-        get_log_content(file_full_path, ['head', '-' + str(args.head), file_full_path])
-    elif args.tail:
-        get_log_content(file_full_path, ['tail', '-' + str(args.tail), file_full_path])
-    elif args.path:
-        print_normal('The path of stdout file is: ' + file_full_path)
-    else:
-        get_log_content(file_full_path, ['cat', file_full_path])
+    print(check_output_command(file_full_path, head=args.head, tail=args.tail))
    
 def log_stdout(args):
    '''get stdout log'''

--- a/tools/nni_cmd/package_management.py
+++ b/tools/nni_cmd/package_management.py
@@ -20,17 +20,18 @@

 import nni
 import os
+import sys
 from subprocess import call
 from .constants import PACKAGE_REQUIREMENTS
 from .common_utils import print_normal, print_error
+from .command_utils import install_requirements_command

 def process_install(package_name):
    if PACKAGE_REQUIREMENTS.get(package_name) is None:
        print_error('{0} is not supported!' % package_name)
    else:
        requirements_path = os.path.join(nni.__path__[0], PACKAGE_REQUIREMENTS[package_name])
-        cmds = 'cd ' + requirements_path + ' && python3 -m pip install --user -r requirements.txt'
-        call(cmds, shell=True)
+        install_requirements_command(requirements_path)

 def package_install(args):
    '''install packages'''

--- a/tools/nni_cmd/ssh_utils.py
+++ b/tools/nni_cmd/ssh_utils.py
@@ -21,14 +21,14 @@
 import os
 from .common_utils import print_error
 from subprocess import call
+from .command_utils import install_package_command

 def check_environment():
    '''check if paramiko is installed'''
    try:
        import paramiko
    except:
-        cmds = 'python3 -m pip install --user paramiko'
-        call(cmds, shell=True)
+        install_package_command('paramiko')

 def copy_remote_directory_to_local(sftp, remote_path, local_path):
    '''copy remote directory to local machine'''

--- a/tools/nni_gpu_tool/gpu_metrics_collector.py
+++ b/tools/nni_gpu_tool/gpu_metrics_collector.py
@@ -25,6 +25,9 @@ import time
 from xml.dom import minidom

 def check_ready_to_run():
+    #TODO check process in windows
+    if sys.platform == 'win32':
+        return True
    pgrep_output =subprocess.check_output('pgrep -fx \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True)
    pidList = []
    for pid in pgrep_output.splitlines():