Merge pull request #244 from microsoft/master

merge master

Merge pull request #244 from microsoft/master
merge master
e29b58a1 · SparkSnail · GitHub · e0c2c0eb · 4f88be1f · e0c2c0eb
Unverified Commit e29b58a1 authored Apr 30, 2020 by SparkSnail Committed by GitHub Apr 30, 2020
16 changed files
--- a/src/nni_manager/training_service/remote_machine/sshClientUtility.ts
+++ b/src/nni_manager/training_service/remote_machine/sshClientUtility.ts
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT license.
-'use strict';
-import * as assert from 'assert';
-import * as os from 'os';
-import * as path from 'path';
-import { Client, ClientChannel, SFTPWrapper } from 'ssh2';
-import * as stream from 'stream';
-import { Deferred } from 'ts-deferred';
-import { NNIError, NNIErrorNames } from '../../common/errors';
-import { getLogger, Logger } from '../../common/log';
-import { getRemoteTmpDir, uniqueString, unixPathJoin } from '../../common/utils';
-import { execRemove, tarAdd } from '../common/util';
-import { RemoteCommandResult } from './remoteMachineData';
-/**
- *
- * Utility for frequent operations towards SSH client
- *
- */
-export namespace SSHClientUtility {
-    /**
-     * Copy local file to remote path
-     * @param localFilePath the path of local file
-     * @param remoteFilePath the target path in remote machine
-     * @param sshClient SSH Client
-     */
-    export function copyFileToRemote(localFilePath: string, remoteFilePath: string, sshClient: Client): Promise<boolean> {
-        const log: Logger = getLogger();
-        log.debug(`copyFileToRemote: localFilePath: ${localFilePath}, remoteFilePath: ${remoteFilePath}`);
-        assert(sshClient !== undefined);
-        const deferred: Deferred<boolean> = new Deferred<boolean>();
-        sshClient.sftp((err: Error, sftp: SFTPWrapper) => {
-            if (err !== undefined && err !== null) {
-                log.error(`copyFileToRemote: ${err.message}, ${localFilePath}, ${remoteFilePath}`);
-                deferred.reject(err);
-                return;
-            }
-            assert(sftp !== undefined);
-            sftp.fastPut(localFilePath, remoteFilePath, (fastPutErr: Error) => {
-                sftp.end();
-                if (fastPutErr !== undefined && fastPutErr !== null) {
-                    deferred.reject(fastPutErr);
-                } else {
-                    deferred.resolve(true);
-                }
-            });
-        });
-        return deferred.promise;
-    }
-    /**
-     * Execute command on remote machine
-     * @param command the command to execute remotely
-     * @param client SSH Client
-     */
-    export function remoteExeCommand(command: string, client: Client): Promise<RemoteCommandResult> {
-        const log: Logger = getLogger();
-        log.debug(`remoteExeCommand: command: [${command}]`);
-        const deferred: Deferred<RemoteCommandResult> = new Deferred<RemoteCommandResult>();
-        let stdout: string = '';
-        let stderr: string = '';
-        let exitCode: number;
-        client.exec(command, (err: Error, channel: ClientChannel) => {
-            if (err !== undefined && err !== null) {
-                log.error(`remoteExeCommand: ${err.message}`);
-                deferred.reject(err);
-                return;
-            }
-            channel.on('data', (data: any, dataStderr: any) => {
-                if (dataStderr !== undefined && dataStderr !== null) {
-                    stderr += data.toString();
-                } else {
-                    stdout += data.toString();
-                }
-            })
-              .on('exit', (code: any, signal: any) => {
-                exitCode = <number>code;
-                deferred.resolve({
-                    stdout : stdout,
-                    stderr : stderr,
-                    exitCode : exitCode
-                });
-            });
-        });
-        return deferred.promise;
-    }
-    /**
-     * Copy files and directories in local directory recursively to remote directory
-     * @param localDirectory local diretory
-     * @param remoteDirectory remote directory
-     * @param sshClient SSH client
-     */
-    export async function copyDirectoryToRemote(localDirectory: string, remoteDirectory: string, sshClient: Client, remoteOS: string): Promise<void> {
-        const tmpSuffix: string = uniqueString(5);
-        const localTarPath: string = path.join(os.tmpdir(), `nni_tmp_local_${tmpSuffix}.tar.gz`);
-        const remoteTarPath: string = unixPathJoin(getRemoteTmpDir(remoteOS), `nni_tmp_remote_${tmpSuffix}.tar.gz`);
-        // Compress files in local directory to experiment root directory
-        await tarAdd(localTarPath, localDirectory);
-        // Copy the compressed file to remoteDirectory and delete it
-        await copyFileToRemote(localTarPath, remoteTarPath, sshClient);
-        await execRemove(localTarPath);
-        // Decompress the remote compressed file in and delete it
-        await remoteExeCommand(`tar -oxzf ${remoteTarPath} -C ${remoteDirectory}`, sshClient);
-        await remoteExeCommand(`rm ${remoteTarPath}`, sshClient);
-    }
-    export function getRemoteFileContent(filePath: string, sshClient: Client): Promise<string> {
-        const deferred: Deferred<string> = new Deferred<string>();
-        sshClient.sftp((err: Error, sftp: SFTPWrapper) => {
-            if (err !== undefined && err !== null) {
-                getLogger()
-                  .error(`getRemoteFileContent: ${err.message}`);
-                deferred.reject(new Error(`SFTP error: ${err.message}`));
-                return;
-            }
-            try {
-                const sftpStream: stream.Readable = sftp.createReadStream(filePath);
-                let dataBuffer: string = '';
-                sftpStream.on('data', (data: Buffer | string) => {
-                    dataBuffer += data;
-                })
-                  .on('error', (streamErr: Error) => {
-                    sftp.end();
-                    deferred.reject(new NNIError(NNIErrorNames.NOT_FOUND, streamErr.message));
-                })
-                  .on('end', () => {
-                    // sftp connection need to be released manually once operation is done
-                    sftp.end();
-                    deferred.resolve(dataBuffer);
-                });
-            } catch (error) {
-                getLogger()
-                  .error(`getRemoteFileContent: ${error.message}`);
-                sftp.end();
-                deferred.reject(new Error(`SFTP error: ${error.message}`));
-            }
-        });
-        return deferred.promise;
-    }
-}
--- a/src/nni_manager/training_service/remote_machine/test/linuxCommands.test.ts
+++ b/src/nni_manager/training_service/remote_machine/test/linuxCommands.test.ts
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+'use strict';
+import * as chai from 'chai';
+import * as chaiAsPromised from 'chai-as-promised';
+import * as component from '../../../common/component';
+import { cleanupUnitTest, prepareUnitTest } from '../../../common/utils';
+import { LinuxCommands } from '../extends/linuxCommands';
+// import { TrialConfigMetadataKey } from '../trialConfigMetadataKey';
+describe('Unit Test for linuxCommands', () => {
+    let linuxCommands: LinuxCommands
+    before(() => {
+        chai.should();
+        chai.use(chaiAsPromised);
+        prepareUnitTest();
+    });
+    after(() => {
+        cleanupUnitTest();
+    });
+    beforeEach(() => {
+        linuxCommands = component.get(LinuxCommands);
+    });
+    afterEach(() => {
+    });
+    it('joinPath', async () => {
+        chai.expect(linuxCommands.joinPath("/root/", "/first")).to.equal("/root/first");
+        chai.expect(linuxCommands.joinPath("/root", "first")).to.equal("/root/first");
+        chai.expect(linuxCommands.joinPath("/root/", "first")).to.equal("/root/first");
+        chai.expect(linuxCommands.joinPath("root/", "first")).to.equal("root/first");
+        chai.expect(linuxCommands.joinPath("root/")).to.equal("root/");
+        chai.expect(linuxCommands.joinPath("root")).to.equal("root");
+        chai.expect(linuxCommands.joinPath("./root")).to.equal("./root");
+        chai.expect(linuxCommands.joinPath("")).to.equal(".");
+        chai.expect(linuxCommands.joinPath("..")).to.equal("..");
+    })
+    it('createFolder', async () => {
+        chai.expect(linuxCommands.createFolder("test")).to.equal("mkdir -p 'test'");
+        chai.expect(linuxCommands.createFolder("test", true)).to.equal("umask 0; mkdir -p 'test'");
+    })
+    it('allowPermission', async () => {
+        chai.expect(linuxCommands.allowPermission(true, "test", "test1")).to.equal("chmod 777 -R 'test' 'test1'");
+        chai.expect(linuxCommands.allowPermission(false, "test")).to.equal("chmod 777 'test'");
+    })
+    it('removeFolder', async () => {
+        chai.expect(linuxCommands.removeFolder("test")).to.equal("rm -df 'test'");
+        chai.expect(linuxCommands.removeFolder("test", true)).to.equal("rm -rf 'test'");
+        chai.expect(linuxCommands.removeFolder("test", true, false)).to.equal("rm -r 'test'");
+        chai.expect(linuxCommands.removeFolder("test", false, false)).to.equal("rm 'test'");
+    })
+    it('removeFiles', async () => {
+        chai.expect(linuxCommands.removeFiles("test", "*.sh")).to.equal("rm 'test/*.sh'");
+        chai.expect(linuxCommands.removeFiles("test", "")).to.equal("rm 'test'");
+    })
+    it('readLastLines', async () => {
+        chai.expect(linuxCommands.readLastLines("test", 3)).to.equal("tail -n 3 'test'");
+    })
+    it('isProcessAlive', async () => {
+        chai.expect(linuxCommands.isProcessAliveCommand("test")).to.equal("kill -0 `cat 'test'`");
+        chai.expect(linuxCommands.isProcessAliveProcessOutput(
+            {
+                exitCode: 0,
+                stdout: "",
+                stderr: ""
+            }
+        )).to.equal(true);
+        chai.expect(linuxCommands.isProcessAliveProcessOutput(
+            {
+                exitCode: 10,
+                stdout: "",
+                stderr: ""
+            }
+        )).to.equal(false);
+    })
+    it('killChildProcesses', async () => {
+        chai.expect(linuxCommands.killChildProcesses("test")).to.equal("pkill -P `cat 'test'`");
+    })
+    it('extractFile', async () => {
+        chai.expect(linuxCommands.extractFile("test.tar", "testfolder")).to.equal("tar -oxzf 'test.tar' -C 'testfolder'");
+    })
+    it('executeScript', async () => {
+        chai.expect(linuxCommands.executeScript("test.sh", true)).to.equal("bash 'test.sh'");
+        chai.expect(linuxCommands.executeScript("test script'\"", false)).to.equal(`bash -c \"test script'\\""`);
+    })
+});
--- a/src/nni_manager/training_service/remote_machine/test/shellExecutor.test.ts
+++ b/src/nni_manager/training_service/remote_machine/test/shellExecutor.test.ts
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+'use strict';
+import * as cpp from 'child-process-promise';
+import * as fs from 'fs';
+import * as chai from 'chai';
+import * as chaiAsPromised from 'chai-as-promised';
+import { Client } from 'ssh2';
+import { ShellExecutor } from '../shellExecutor';
+import { prepareUnitTest, cleanupUnitTest } from '../../../common/utils';
+const LOCALFILE: string = '/tmp/localSshclientUTData';
+const REMOTEFILE: string = '/tmp/remoteSshclientUTData';
+const REMOTEFOLDER: string = '/tmp/remoteSshclientUTFolder';
+async function copyFile(executor: ShellExecutor): Promise<void> {
+    await executor.copyFileToRemote(LOCALFILE, REMOTEFILE);
+}
+async function copyFileToRemoteLoop(executor: ShellExecutor): Promise<void> {
+    for (let i: number = 0; i < 10; i++) {
+        // console.log(i);
+        await executor.copyFileToRemote(LOCALFILE, REMOTEFILE);
+    }
+}
+async function getRemoteFileContentLoop(executor: ShellExecutor): Promise<void> {
+    for (let i: number = 0; i < 10; i++) {
+        // console.log(i);
+        await executor.getRemoteFileContent(REMOTEFILE);
+    }
+}
+describe('ShellExecutor test', () => {
+    let skip: boolean = false;
+    let rmMeta: any;
+    try {
+        rmMeta = JSON.parse(fs.readFileSync('../../.vscode/rminfo.json', 'utf8'));
+        console.log(rmMeta);
+    } catch (err) {
+        console.log(`Please configure rminfo.json to enable remote machine test.${err}`);
+        skip = true;
+    }
+    before(async () => {
+        chai.should();
+        chai.use(chaiAsPromised);
+        await cpp.exec(`echo '1234' > ${LOCALFILE}`);
+        prepareUnitTest();
+    });
+    after(() => {
+        cleanupUnitTest();
+        fs.unlinkSync(LOCALFILE);
+    });
+    it('Test mkdir', async () => {
+        if (skip) {
+            return;
+        }
+        const shellExecutor: ShellExecutor = new ShellExecutor();
+        await shellExecutor.initialize(rmMeta);
+        let result = await shellExecutor.createFolder(REMOTEFOLDER, false);
+        chai.expect(result).eq(true);
+        result = await shellExecutor.removeFolder(REMOTEFOLDER);
+        chai.expect(result).eq(true);
+    });
+    it('Test ShellExecutor', async () => {
+        if (skip) {
+            return;
+        }
+        const shellExecutor: ShellExecutor = new ShellExecutor();
+        await shellExecutor.initialize(rmMeta);
+        await copyFile(shellExecutor);
+        await Promise.all([
+            copyFileToRemoteLoop(shellExecutor),
+            copyFileToRemoteLoop(shellExecutor),
+            copyFileToRemoteLoop(shellExecutor),
+            getRemoteFileContentLoop(shellExecutor)
+        ]);
+    });
+});
--- a/src/nni_manager/training_service/test/sshClientUtility.test.ts
+++ b/src/nni_manager/training_service/test/sshClientUtility.test.ts
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT license.
-'use strict';
-import * as cpp from 'child-process-promise';
-import * as fs from 'fs';
-import { Client } from 'ssh2';
-import { Deferred } from 'ts-deferred';
-import { SSHClientUtility } from '../remote_machine/sshClientUtility';
-const LOCALFILE: string = '/tmp/sshclientUTData';
-const REMOTEFILE: string = '/tmp/sshclientUTData';
-async function copyFile(conn: Client): Promise<void> {
-    const deferred: Deferred<void> = new Deferred<void>();
-    conn.sftp((err, sftp) => {
-        if (err) {
-            deferred.reject(err);
-            return;
-        }
-        sftp.fastPut(
-            LOCALFILE,
-            REMOTEFILE, (fastPutErr: Error) => {
-                sftp.end();
-                if (fastPutErr) {
-                    deferred.reject(fastPutErr);
-                } else {
-                    deferred.resolve();
-                }
-            }
-        );
-    });
-    return deferred.promise;
-}
-async function copyFileToRemoteLoop(conn: Client): Promise<void> {
-    for (let i: number = 0; i < 500; i++) {
-        console.log(i);
-        await SSHClientUtility.copyFileToRemote(LOCALFILE, REMOTEFILE, conn);
-    }
-}
-async function remoteExeCommandLoop(conn: Client): Promise<void> {
-    for (let i: number = 0; i < 500; i++) {
-        console.log(i);
-        await SSHClientUtility.remoteExeCommand('ls', conn);
-    }
-}
-async function getRemoteFileContentLoop(conn: Client): Promise<void> {
-    for (let i: number = 0; i < 500; i++) {
-        console.log(i);
-        await SSHClientUtility.getRemoteFileContent(REMOTEFILE, conn);
-    }
-}
-describe('sshClientUtility test', () => {
-    let skip: boolean = true;
-    let rmMeta: any;
-    try {
-        rmMeta = JSON.parse(fs.readFileSync('../../.vscode/rminfo.json', 'utf8'));
-    } catch (err) {
-        skip = true;
-    }
-    before(async () => {
-        await cpp.exec(`echo '1234' > ${LOCALFILE}`);
-    });
-    after(() => {
-        fs.unlinkSync(LOCALFILE);
-    });
-    it('Test SSHClientUtility', (done) => {
-        if (skip) {
-            done();
-            return;
-        }
-        const conn: Client = new Client();
-        conn.on('ready', async () => {
-            await copyFile(conn);
-            await Promise.all([
-                copyFileToRemoteLoop(conn),
-                copyFileToRemoteLoop(conn),
-                copyFileToRemoteLoop(conn),
-                remoteExeCommandLoop(conn),
-                getRemoteFileContentLoop(conn)
-            ]);
-            done();
-        }).connect(rmMeta);
-    });
-});
--- a/src/sdk/pynni/nni/nas/pytorch/base_mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/base_mutator.py
@@ -104,7 +104,7 @@ class BaseMutator(nn.Module):
        """
        pass
-    def on_forward_layer_choice(self, mutable, *inputs):
+    def on_forward_layer_choice(self, mutable, *args, **kwargs):
        """
        Callbacks of forward in LayerChoice.
@@ -112,8 +112,10 @@ class BaseMutator(nn.Module):
        ----------
        mutable : LayerChoice
            Module whose forward is called.
-        inputs : list of torch.Tensor
+        args : list of torch.Tensor
            The arguments of its forward function.
+        kwargs : dict
+            The keyword arguments of its forward function.
        Returns
        -------

--- a/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
@@ -203,7 +203,7 @@ class ClassicMutator(Mutator):
            # for now we only generate flattened search space
            if isinstance(mutable, LayerChoice):
                key = mutable.key
-                val = [repr(choice) for choice in mutable.choices]
+                val = mutable.names
                search_space[key] = {"_type": LAYER_CHOICE, "_value": val}
            elif isinstance(mutable, InputChoice):
                key = mutable.key

--- a/src/sdk/pynni/nni/nas/pytorch/mutables.py
+++ b/src/sdk/pynni/nni/nas/pytorch/mutables.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.
 import logging
+from collections import OrderedDict
 import torch.nn as nn
@@ -58,9 +59,6 @@ class Mutable(nn.Module):
                               "Or did you apply multiple fixed architectures?")
        self.__dict__["mutator"] = mutator
-    def forward(self, *inputs):
-        raise NotImplementedError
    @property
    def key(self):
        """
@@ -86,9 +84,6 @@ class Mutable(nn.Module):
                "Or did you initialize a mutable on the fly in forward pass? Move to `__init__` "
                "so that trainer can locate all your mutables. See NNI docs for more details.".format(self))
-    def __repr__(self):
-        return "{} ({})".format(self.name, self.key)
 class MutableScope(Mutable):
    """
@@ -131,7 +126,7 @@ class LayerChoice(Mutable):
    Parameters
    ----------
-    op_candidates : list of nn.Module
+    op_candidates : list of nn.Module or OrderedDict
        A module list to be selected from.
    reduction : str
        ``mean``, ``concat``, ``sum`` or ``none``. Policy if multiples are selected.
@@ -146,23 +141,53 @@ class LayerChoice(Mutable):
    ----------
    length : int
        Number of ops to choose from.
+    names: list of str
+        Names of candidates.
+    Notes
+    -----
+    ``op_candidates`` can be a list of modules or a ordered dict of named modules, for example,
+    .. code-block:: python
+        self.op_choice = LayerChoice(OrderedDict([
+            ("conv3x3", nn.Conv2d(3, 16, 128)),
+            ("conv5x5", nn.Conv2d(5, 16, 128)),
+            ("conv7x7", nn.Conv2d(7, 16, 128))
+        ]))
    """
    def __init__(self, op_candidates, reduction="sum", return_mask=False, key=None):
        super().__init__(key=key)
        self.length = len(op_candidates)
-        self.choices = nn.ModuleList(op_candidates)
+        self.choices = []
+        self.names = []
+        if isinstance(op_candidates, OrderedDict):
+            for name, module in op_candidates.items():
+                assert name not in ["length", "reduction", "return_mask", "_key", "key", "names"], \
+                    "Please don't use a reserved name '{}' for your module.".format(name)
+                self.add_module(name, module)
+                self.choices.append(module)
+                self.names.append(name)
+        elif isinstance(op_candidates, list):
+            for i, module in enumerate(op_candidates):
+                self.add_module(str(i), module)
+                self.choices.append(module)
+                self.names.append(str(i))
+        else:
+            raise TypeError("Unsupported op_candidates type: {}".format(type(op_candidates)))
        self.reduction = reduction
        self.return_mask = return_mask
-    def forward(self, *inputs):
+    def forward(self, *args, **kwargs):
        """
        Returns
        -------
        tuple of tensors
            Output and selection mask. If ``return_mask`` is ``False``, only output is returned.
        """
-        out, mask = self.mutator.on_forward_layer_choice(self, *inputs)
+        out, mask = self.mutator.on_forward_layer_choice(self, *args, **kwargs)
        if self.return_mask:
            return out, mask
        return out

--- a/src/sdk/pynni/nni/nas/pytorch/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/mutator.py
@@ -128,7 +128,7 @@ class Mutator(BaseMutator):
            result["mutable"][mutable.key].append(path)
        return result
-    def on_forward_layer_choice(self, mutable, *inputs):
+    def on_forward_layer_choice(self, mutable, *args, **kwargs):
        """
        On default, this method retrieves the decision obtained previously, and select certain operations.
        Only operations with non-zero weight will be executed. The results will be added to a list.
@@ -138,7 +138,9 @@ class Mutator(BaseMutator):
        ----------
        mutable : LayerChoice
            Layer choice module.
-        inputs : list of torch.Tensor
+        args : list of torch.Tensor
+            Inputs
+        kwargs : dict
            Inputs
        Returns
@@ -148,16 +150,16 @@ class Mutator(BaseMutator):
        """
        if self._connect_all:
            return self._all_connect_tensor_reduction(mutable.reduction,
-                                                      [op(*inputs) for op in mutable.choices]), \
+                                                      [op(*args, **kwargs) for op in mutable.choices]), \
                torch.ones(mutable.length)
-        def _map_fn(op, *inputs):
+        def _map_fn(op, args, kwargs):
-            return op(*inputs)
+            return op(*args, **kwargs)
        mask = self._get_decision(mutable)
        assert len(mask) == len(mutable.choices), \
            "Invalid mask, expected {} to be of length {}.".format(mask, len(mutable.choices))
-        out = self._select_with_mask(_map_fn, [(choice, *inputs) for choice in mutable.choices], mask)
+        out = self._select_with_mask(_map_fn, [(choice, args, kwargs) for choice in mutable.choices], mask)
        return self._tensor_reduction(mutable.reduction, out), mask
    def on_forward_input_choice(self, mutable, tensor_list):

--- a/src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
@@ -317,7 +317,7 @@ class ProxylessNasMutator(BaseMutator):
            self.mutable_list.append(mutable)
            mutable.registered_module = MixedOp(mutable)
-    def on_forward_layer_choice(self, mutable, *inputs):
+    def on_forward_layer_choice(self, mutable, *args, **kwargs):
        """
        Callback of layer choice forward. This function defines the forward
        logic of the input mutable. So mutable is only interface, its real
@@ -327,7 +327,9 @@ class ProxylessNasMutator(BaseMutator):
        ----------
        mutable: LayerChoice
            forward logic of this input mutable
-        inputs: list of torch.Tensor
+        args: list of torch.Tensor
+            inputs of this mutable
+        kwargs: dict
            inputs of this mutable
        Returns
@@ -339,7 +341,7 @@ class ProxylessNasMutator(BaseMutator):
        """
        # FIXME: return mask, to be consistent with other algorithms
        idx = mutable.registered_module.active_op_index
-        return mutable.registered_module(mutable, *inputs), idx
+        return mutable.registered_module(mutable, *args, **kwargs), idx
    def reset_binary_gates(self):
        """

--- a/src/webui/yarn.lock
+++ b/src/webui/yarn.lock
@@ -5593,7 +5593,7 @@ load-json-file@^4.0.0:
    pify "^3.0.0"
    strip-bom "^3.0.0"
-loader-fs-cache@>=1.0.3, loader-fs-cache@^1.0.0:
+loader-fs-cache@^1.0.0:
  version "1.0.3"
  resolved "https://registry.yarnpkg.com/loader-fs-cache/-/loader-fs-cache-1.0.3.tgz#f08657646d607078be2f0a032f8bd69dd6f277d9"
  integrity sha512-ldcgZpjNJj71n+2Mf6yetz+c9bM4xpKtNds4LbqXzU/PTdeAX0g3ytnU1AJMEcTk2Lex4Smpe3Q/eCTsvUBxbA==

--- a/test/config/integration_tests.yml
+++ b/test/config/integration_tests.yml
@@ -77,6 +77,14 @@ testCases:
    kwargs:
      expected_result_file: expected_metrics.json
+- name: export-float
+  configFile: test/config/metrics_test/config.yml
+  config:
+    maxTrialNum: 1
+    trialConcurrency: 1
+  validator:
+    class: ExportValidator 
 - name: metrics-dict
  configFile: test/config/metrics_test/config_dict_metrics.yml
  config:
@@ -87,6 +95,14 @@ testCases:
    kwargs:
      expected_result_file: expected_metrics_dict.json
+- name: export-dict
+  configFile: test/config/metrics_test/config_dict_metrics.yml
+  config:
+    maxTrialNum: 1
+    trialConcurrency: 1
+  validator:
+    class: ExportValidator 
 - name: nnicli
  configFile: test/config/examples/sklearn-regression.yml
  config:

--- a/test/nni_test/nnitest/validators.py
+++ b/test/nni_test/nnitest/validators.py
@@ -2,6 +2,8 @@
 # Licensed under the MIT license.
 import os.path as osp
+from os import remove
+import subprocess
 import json
 import requests
 import nnicli as nc
@@ -12,6 +14,24 @@ class ITValidator:
    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
        pass
+class ExportValidator(ITValidator):
+    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
+        exp_id = osp.split(experiment_dir)[-1]
+        proc1 = subprocess.run(["nnictl", "experiment", "export", exp_id, "-t", "csv", "-f", "report.csv"])
+        assert proc1.returncode == 0, '`nnictl experiment export -t csv` failed with code %d' % proc1.returncode
+        with open("report.csv", 'r') as f:
+            print('Exported CSV file: \n')
+            print(''.join(f.readlines()))
+            print('\n\n')
+        remove('report.csv')
+        proc2 = subprocess.run(["nnictl", "experiment", "export", exp_id, "-t", "json", "-f", "report.json"])
+        assert proc2.returncode == 0, '`nnictl experiment export -t json` failed with code %d' % proc2.returncode
+        with open("report.json", 'r') as f:
+            print('Exported JSON file: \n')
+            print('\n'.join(f.readlines()))
+            print('\n\n')
+        remove('report.json')
 class MetricsValidator(ITValidator):
    def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):

--- a/test/pipelines/pipelines-it-pai-windows.yml
+++ b/test/pipelines/pipelines-it-pai-windows.yml
@@ -70,5 +70,5 @@ jobs:
      python --version
      mount -o anon $(pai_nfs_uri) $(local_nfs_uri)
      python nni_test/nnitest/generate_ts_config.py --ts pai --pai_token $(pai_token) --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $(docker_image) --pai_storage_plugin $(pai_storage_plugin) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
-      python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase
+      python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
    displayName: 'Examples and advanced features tests on pai'
\ No newline at end of file
--- a/test/pipelines/pipelines-it-pai.yml
+++ b/test/pipelines/pipelines-it-pai.yml
@@ -57,5 +57,5 @@ jobs:
      cd test
      python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\
       --pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
-      PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase
+      PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
    displayName: 'integration test'
--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py
@@ -699,12 +699,13 @@ def export_trials_data(args):
                content = json.loads(response.text)
                trial_records = []
                for record in content:
-                    if not isinstance(record['value'], (float, int)):
+                    record_value = json.loads(record['value'])
-                        formated_record = {**record['parameter'], **record['value'], **{'id': record['id']}}
+                    if not isinstance(record_value, (float, int)):
+                        formated_record = {**record['parameter'], **record_value, **{'id': record['id']}}
                    else:
-                        formated_record = {**record['parameter'], **{'reward': record['value'], 'id': record['id']}}
+                        formated_record = {**record['parameter'], **{'reward': record_value, 'id': record['id']}}
                    trial_records.append(formated_record)
-                with open(args.path, 'w') as file:
+                with open(args.path, 'w', newline='') as file:
                    writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in trial_records]))
                    writer.writeheader()
                    writer.writerows(trial_records)

--- a/tools/nni_gpu_tool/gpu_metrics_collector.py
+++ b/tools/nni_gpu_tool/gpu_metrics_collector.py
@@ -10,27 +10,31 @@ import traceback
 from xml.dom import minidom
 def check_ready_to_run():
    if sys.platform == 'win32':
        pgrep_output = subprocess.check_output(
            'wmic process where "CommandLine like \'%nni_gpu_tool.gpu_metrics_collector%\' and name like \'%python%\'" get processId')
        pidList = pgrep_output.decode("utf-8").strip().split()
-        pidList.pop(0) # remove the key word 'ProcessId'
+        pidList.pop(0)  # remove the key word 'ProcessId'
        pidList = list(map(int, pidList))
        pidList.remove(os.getpid())
        return not pidList
    else:
-        pgrep_output = subprocess.check_output('pgrep -fxu "$(whoami)" \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True)
+        pgrep_output = subprocess.check_output('pgrep -afu "$(whoami)" \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True)
        pidList = []
        for pid in pgrep_output.splitlines():
-            pidList.append(int(pid))
+            pid = pid.decode()
-        pidList.remove(os.getpid())
+            if "pgrep " in pid or pid.startswith('%s ' % os.getpid()) or pid.startswith('%s ' % os.getppid()):
+                continue
+            pidList.append(pid)
        return not pidList
 def main(argv):
    metrics_output_dir = os.environ['METRIC_OUTPUT_DIR']
    if check_ready_to_run() == False:
-        # GPU metrics collector is already running. Exit
+        print("GPU metrics collector is already running. exiting...")
        exit(2)
    cmd = 'nvidia-smi -q -x'.split()
    while(True):
@@ -44,6 +48,7 @@ def main(argv):
        # TODO: change to sleep time configurable via arguments
        time.sleep(5)
 def parse_nvidia_smi_result(smi, outputDir):
    try:
        old_umask = os.umask(0)
@@ -70,13 +75,14 @@ def parse_nvidia_smi_result(smi, outputDir):
                outPut["gpuInfos"].append(gpuInfo)
            print(outPut)
            outputFile.write("{}\n".format(json.dumps(outPut, sort_keys=True)))
-            outputFile.flush();
+            outputFile.flush()
-    except:
+    except Exception as error:
        # e_info = sys.exc_info()
-        print('xmldoc paring error')
+        print('gpu_metrics_collector error: %s' % error)
    finally:
        os.umask(old_umask)
 def gen_empty_gpu_metric(outputDir):
    try:
        old_umask = os.umask(0)