amlEnvironmentService.ts 5.6 KB
Newer Older
SparkSnail's avatar
SparkSnail committed
1
2
3
4
5
6
7
8
9
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

'use strict';

import * as fs from 'fs';
import * as path from 'path';
import * as component from '../../../common/component';
import { getLogger, Logger } from '../../../common/log';
liuzhe-lz's avatar
liuzhe-lz committed
10
import { ExperimentConfig, AmlConfig, flattenConfig } from '../../../common/experimentConfig';
SparkSnail's avatar
SparkSnail committed
11
import { validateCodeDir } from '../../common/util';
12
import { AMLClient } from '../aml/amlClient';
liuzhe-lz's avatar
liuzhe-lz committed
13
import { AMLEnvironmentInformation } from '../aml/amlConfig';
14
import { EnvironmentInformation, EnvironmentService } from '../environment';
15
16
import { EventEmitter } from "events";
import { AMLCommandChannel } from '../channels/amlCommandChannel';
17
import { SharedStorageService } from '../sharedStorage'
SparkSnail's avatar
SparkSnail committed
18

liuzhe-lz's avatar
liuzhe-lz committed
19
interface FlattenAmlConfig extends ExperimentConfig, AmlConfig { }
SparkSnail's avatar
SparkSnail committed
20
21

/**
22
 * Collector AML jobs info from AML cluster, and update aml job status locally
SparkSnail's avatar
SparkSnail committed
23
24
25
 */
@component.Singleton
export class AMLEnvironmentService extends EnvironmentService {
26

liuzhe-lz's avatar
liuzhe-lz committed
27
    private readonly log: Logger = getLogger('AMLEnvironmentService');
SparkSnail's avatar
SparkSnail committed
28
29
    private experimentId: string;
    private experimentRootDir: string;
liuzhe-lz's avatar
liuzhe-lz committed
30
    private config: FlattenAmlConfig;
SparkSnail's avatar
SparkSnail committed
31

32
    constructor(experimentRootDir: string, experimentId: string, config: ExperimentConfig) {
SparkSnail's avatar
SparkSnail committed
33
        super();
34
35
        this.experimentId = experimentId;
        this.experimentRootDir = experimentRootDir;
liuzhe-lz's avatar
liuzhe-lz committed
36
37
        this.config = flattenConfig(config, 'aml');
        validateCodeDir(this.config.trialCodeDirectory);
SparkSnail's avatar
SparkSnail committed
38
39
40
41
42
43
    }

    public get hasStorageService(): boolean {
        return false;
    }

44
45
    public initCommandChannel(eventEmitter: EventEmitter): void {
        this.commandChannel = new AMLCommandChannel(eventEmitter);
SparkSnail's avatar
SparkSnail committed
46
47
    }

J-shang's avatar
J-shang committed
48
    public createEnvironmentInformation(envId: string, envName: string): EnvironmentInformation {
SparkSnail's avatar
SparkSnail committed
49
50
51
        return new AMLEnvironmentInformation(envId, envName);
    }

52
53
54
55
    public get getName(): string {
        return 'aml';
    }

SparkSnail's avatar
SparkSnail committed
56
57
58
    public async refreshEnvironmentsStatus(environments: EnvironmentInformation[]): Promise<void> {
        environments.forEach(async (environment) => {
            const amlClient = (environment as AMLEnvironmentInformation).amlClient;
59
            if (!amlClient) {
60
                return Promise.reject('AML client not initialized!');
SparkSnail's avatar
SparkSnail committed
61
            }
62
63
            const newStatus = await amlClient.updateStatus(environment.status);
            switch (newStatus.toUpperCase()) {
SparkSnail's avatar
SparkSnail committed
64
65
                case 'WAITING':
                case 'QUEUED':
66
67
68
69
                    environment.setStatus('WAITING');
                    break;
                case 'RUNNING':
                    environment.setStatus('RUNNING');
SparkSnail's avatar
SparkSnail committed
70
71
72
                    break;
                case 'COMPLETED':
                case 'SUCCEEDED':
73
                    environment.setStatus('SUCCEEDED');
SparkSnail's avatar
SparkSnail committed
74
75
                    break;
                case 'FAILED':
76
77
                    environment.setStatus('FAILED');
                    return Promise.reject(`AML: job ${environment.envId} is failed!`);
SparkSnail's avatar
SparkSnail committed
78
79
                case 'STOPPED':
                case 'STOPPING':
80
                    environment.setStatus('USER_CANCELED');
SparkSnail's avatar
SparkSnail committed
81
82
                    break;
                default:
83
                    environment.setStatus('UNKNOWN');
SparkSnail's avatar
SparkSnail committed
84
85
86
87
88
89
            }
        });
    }

    public async startEnvironment(environment: EnvironmentInformation): Promise<void> {
        const amlEnvironment: AMLEnvironmentInformation = environment as AMLEnvironmentInformation;
SparkSnail's avatar
SparkSnail committed
90
        const environmentLocalTempFolder = path.join(this.experimentRootDir, "environment-temp");
91
92
93
94
95
96
97
98
99
100
101
        if (!fs.existsSync(environmentLocalTempFolder)) {
            await fs.promises.mkdir(environmentLocalTempFolder, {recursive: true});
        }
        if (amlEnvironment.useSharedStorage) {
            const environmentRoot = component.get<SharedStorageService>(SharedStorageService).remoteWorkingRoot;
            const remoteMountCommand = component.get<SharedStorageService>(SharedStorageService).remoteMountCommand;
            amlEnvironment.command = `${remoteMountCommand} && cd ${environmentRoot} && ${amlEnvironment.command}`.replace(/"/g, `\\"`);
        } else {
            amlEnvironment.command = `mv envs outputs/envs && cd outputs && ${amlEnvironment.command}`;
        }
        amlEnvironment.command = `import os\nos.system('${amlEnvironment.command}')`;
SparkSnail's avatar
SparkSnail committed
102
103
104
        if (this.config.deprecated && this.config.deprecated.useActiveGpu !== undefined) {
            amlEnvironment.useActiveGpu = this.config.deprecated.useActiveGpu;
        }
liuzhe-lz's avatar
liuzhe-lz committed
105
        amlEnvironment.maxTrialNumberPerGpu = this.config.maxTrialNumberPerGpu;
106

107
        await fs.promises.writeFile(path.join(environmentLocalTempFolder, 'nni_script.py'), amlEnvironment.command, { encoding: 'utf8' });
SparkSnail's avatar
SparkSnail committed
108
        const amlClient = new AMLClient(
liuzhe-lz's avatar
liuzhe-lz committed
109
110
111
            this.config.subscriptionId,
            this.config.resourceGroup,
            this.config.workspaceName,
SparkSnail's avatar
SparkSnail committed
112
            this.experimentId,
liuzhe-lz's avatar
liuzhe-lz committed
113
114
            this.config.computeTarget,
            this.config.dockerImage,
SparkSnail's avatar
SparkSnail committed
115
116
117
118
            'nni_script.py',
            environmentLocalTempFolder
        );
        amlEnvironment.id = await amlClient.submit();
liuzhe-lz's avatar
liuzhe-lz committed
119
        this.log.debug('aml: before getTrackingUrl');
SparkSnail's avatar
SparkSnail committed
120
        amlEnvironment.trackingUrl = await amlClient.getTrackingUrl();
liuzhe-lz's avatar
liuzhe-lz committed
121
        this.log.debug('aml: after getTrackingUrl');
SparkSnail's avatar
SparkSnail committed
122
123
124
125
126
127
128
129
130
131
132
133
        amlEnvironment.amlClient = amlClient;
    }

    public async stopEnvironment(environment: EnvironmentInformation): Promise<void> {
        const amlEnvironment: AMLEnvironmentInformation = environment as AMLEnvironmentInformation;
        const amlClient = amlEnvironment.amlClient;
        if (!amlClient) {
            throw new Error('AML client not initialized!');
        }
        amlClient.stop();
    }
}