localEnvironmentService.ts 6.99 KB
Newer Older
1
2
3
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

4
5
6
7
8
9
10
import fs from 'fs';
import path from 'path';
import tkill from 'tree-kill';
import * as component from 'common/component';
import { getLogger, Logger } from 'common/log';
import { ExperimentConfig } from 'common/experimentConfig';
import { ExperimentStartupInfo } from 'common/experimentStartupInfo';
11
import { powershellString, createScriptFile } from 'common/shellUtils';
12
import { EnvironmentInformation, EnvironmentService } from '../environment';
13
14
import { isAlive, getNewLine } from 'common/utils';
import { execMkdir, runScript, getScriptName, execCopydir } from 'training_service/common/util';
15
import { SharedStorageService } from '../sharedStorage'
16
17
18
19

@component.Singleton
export class LocalEnvironmentService extends EnvironmentService {

liuzhe-lz's avatar
liuzhe-lz committed
20
    private readonly log: Logger = getLogger('LocalEnvironmentService');
21
22
23
    private experimentRootDir: string;
    private experimentId: string;

24
    constructor(_config: ExperimentConfig, info: ExperimentStartupInfo) {
25
        super();
26
27
        this.experimentId = info.experimentId;
        this.experimentRootDir = info.logDir;
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
    }

    public get environmentMaintenceLoopInterval(): number {
        return 100;
    }

    public get hasStorageService(): boolean {
        return false;
    }

    public get getName(): string {
        return 'local';
    }

    public async refreshEnvironmentsStatus(environments: EnvironmentInformation[]): Promise<void> {
        environments.forEach(async (environment) => {
44
45
            const jobpidPath: string = `${path.join(environment.runnerWorkingFolder, 'pid')}`;
            const runnerReturnCodeFilePath: string = `${path.join(environment.runnerWorkingFolder, 'code')}`;
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
            /* eslint-disable require-atomic-updates */
            try {
                // check if pid file exist
                const pidExist = await fs.existsSync(jobpidPath);
                if (!pidExist) {
                    return;
                }
                const pid: string = await fs.promises.readFile(jobpidPath, 'utf8');
                const alive: boolean = await isAlive(pid);
                environment.status = 'RUNNING';
                // if the process of jobpid is not alive any more
                if (!alive) {
                    if (fs.existsSync(runnerReturnCodeFilePath)) {
                        const runnerReturnCode: string = await fs.promises.readFile(runnerReturnCodeFilePath, 'utf8');
                        const match: RegExpMatchArray | null = runnerReturnCode.trim()
                            .match(/^-?(\d+)\s+(\d+)$/);
                        if (match !== null) {
                            const { 1: code } = match;
                            // Update trial job's status based on result code
                            if (parseInt(code, 10) === 0) {
                                environment.setStatus('SUCCEEDED');
                            } else {
                                environment.setStatus('FAILED');
                            }
                        }
                    }
                }
            } catch (error) {
                this.log.error(`Update job status exception, error is ${error.message}`);
            }
        });
    }
78
79
80
81
    
    private getScript(environment: EnvironmentInformation): string[] {
        const script: string[] = [];
        if (process.platform === 'win32') {
82
            script.push(`$env:PATH=${powershellString(process.env['path']!)}`)
83
84
            script.push(`cd $env:${this.experimentRootDir}`);
            script.push(`New-Item -ItemType "directory" -Path ${path.join(this.experimentRootDir, 'envs', environment.id)} -Force`);
J-shang's avatar
J-shang committed
85
86
            script.push(`cd envs\\${environment.id}`);
            environment.command = `python -m nni.tools.trial_tool.trial_runner`;
87
88
89
90
91
92
93
            script.push(
                `cmd.exe /c ${environment.command} --job_pid_file ${path.join(environment.runnerWorkingFolder, 'pid')} 2>&1 | Out-File "${path.join(environment.runnerWorkingFolder, 'trial_runner.log')}" -encoding utf8`,
                `$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`,
                `$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`,
                `Write $LASTEXITCODE " " $NOW_DATE  | Out-File "${path.join(environment.runnerWorkingFolder, 'code')}" -NoNewline -encoding utf8`);
        } else {
            script.push(`cd ${this.experimentRootDir}`);
SparkSnail's avatar
SparkSnail committed
94
            script.push(`eval ${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid 1>${environment.runnerWorkingFolder}/trialrunner_stdout 2>${environment.runnerWorkingFolder}/trialrunner_stderr`);
95
96
97
98
99
100
101
102
103
104
105
            if (process.platform === 'darwin') {
                // https://superuser.com/questions/599072/how-to-get-bash-execution-time-in-milliseconds-under-mac-os-x
                // Considering the worst case, write 999 to avoid negative duration
                script.push(`echo $? \`date +%s999\` >'${environment.runnerWorkingFolder}/code'`);
            } else {
                script.push(`echo $? \`date +%s%3N\` >'${environment.runnerWorkingFolder}/code'`);
            }
        }

        return script;
    }
106
107

    public async startEnvironment(environment: EnvironmentInformation): Promise<void> {
108
109
110
111
112
        // Need refactor, this temp folder path is not appropriate, there are two expId in this path
        const sharedStorageService = component.get<SharedStorageService>(SharedStorageService);
        if (environment.useSharedStorage && sharedStorageService.canLocalMounted) {
            this.experimentRootDir = sharedStorageService.localWorkingRoot;
        }
113
        const localEnvCodeFolder: string = path.join(this.experimentRootDir, "envs");
114
115
116
117
118
119
        if (environment.useSharedStorage && !sharedStorageService.canLocalMounted) {
            await sharedStorageService.storageService.copyDirectoryBack("envs", localEnvCodeFolder)
        } else if (!environment.useSharedStorage) {
            const localTempFolder: string = path.join(this.experimentRootDir, "environment-temp", "envs");
            await execCopydir(localTempFolder, localEnvCodeFolder);
        }
120
121
        environment.runnerWorkingFolder = path.join(localEnvCodeFolder, environment.id);
        await execMkdir(environment.runnerWorkingFolder);
122
123
        environment.command = this.getScript(environment).join(getNewLine());
        const scriptName: string = getScriptName('run');
124
        await createScriptFile(path.join(localEnvCodeFolder, scriptName), environment.command);
125

126
        // Execute command in local machine
127
        runScript(path.join(localEnvCodeFolder, scriptName));
128
129
130
131
        environment.trackingUrl = `${environment.runnerWorkingFolder}`;
    }

    public async stopEnvironment(environment: EnvironmentInformation): Promise<void> {
132
133
134
135
        if (environment.isAlive === false) {
            return Promise.resolve();
        }

136
        const jobpidPath: string = `${path.join(environment.runnerWorkingFolder, 'pid')}`;
137
138
139
140
        const pid: string = await fs.promises.readFile(jobpidPath, 'utf8');
        tkill(Number(pid), 'SIGKILL');
    }
}