environment.ts 3.67 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

'use strict';

import { GPUSummary } from "training_service/common/gpuData";
import { getLogger, Logger } from "../../common/log";
import { TrialJobStatus } from "../../common/trainingService";
import { EventEmitter } from "events";
import { WebCommandChannel } from "./channels/webCommandChannel";
import { CommandChannel } from "./commandChannel";


export type EnvironmentStatus = 'UNKNOWN' | 'WAITING' | 'RUNNING' | 'SUCCEEDED' | 'FAILED' | 'USER_CANCELED';
export type Channel = "web" | "file" | "aml" | "ut";

export class EnvironmentInformation {
    private log: Logger;

    // NNI environment ID
    public id: string;
    // training platform unique job ID.
    public jobId: string;
    // training platform job friendly name, in case it's different with job ID.
    public jobName: string;

    // key states
    // true: environment is ready to run trial.
    public isIdle: boolean = false;
    // true: environment is running, waiting, or unknown.
    public isAlive: boolean = true;
    // don't set status in environment directly, use setFinalState function to set a final state.
    public status: EnvironmentStatus = "UNKNOWN";

    public trackingUrl: string = "";
    public workingFolder: string = "";
    public runnerWorkingFolder: string = "";
    public command: string = "";
    public nodeCount: number = 1;

    // it's used to aggregate node status for multiple node trial
    public nodes: Map<string, NodeInfomation>;
    public gpuSummary: Map<string, GPUSummary> = new Map<string, GPUSummary>();

    constructor(id: string, jobName: string, jobId?: string) {
        this.log = getLogger();
        this.id = id;
        this.jobName = jobName;
        this.jobId = jobId ? jobId : jobName;
        this.nodes = new Map<string, NodeInfomation>();
    }

    public setFinalStatus(status: EnvironmentStatus): void {
        switch (status) {
            case 'WAITING':
            case 'SUCCEEDED':
            case 'FAILED':
            case 'USER_CANCELED':
                this.status = status;
                break;
            default:
                this.log.error(`Environment: job ${this.jobId} set an invalid final state ${status}.`);
                break;
        }
    }
}
SparkSnail's avatar
SparkSnail committed
67

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
export abstract class EnvironmentService {

    public abstract get hasStorageService(): boolean;

    public abstract config(key: string, value: string): Promise<void>;
    public abstract refreshEnvironmentsStatus(environments: EnvironmentInformation[]): Promise<void>;
    public abstract startEnvironment(environment: EnvironmentInformation): Promise<void>;
    public abstract stopEnvironment(environment: EnvironmentInformation): Promise<void>;

    public getCommandChannel(commandEmitter: EventEmitter): CommandChannel {
        return new WebCommandChannel(commandEmitter);
    }

    public createEnviornmentInfomation(envId: string, envName: string): EnvironmentInformation {
        return new EnvironmentInformation(envId, envName);
    }
}

export class NodeInfomation {
    public id: string;
    public status: TrialJobStatus = "UNKNOWN";
    public endTime?: number;

    constructor(id: string) {
        this.id = id;
    }
}

export class RunnerSettings {
    public experimentId: string = "";
    public platform: string = "";
    public nniManagerIP: string = "";
    public nniManagerPort: number = 8081;
    public nniManagerVersion: string = "";
    public logCollection: string = "none";
    public command: string = "";
    public enableGpuCollector: boolean = false;

    // specify which communication channel is used by runner.
    // supported channel includes: rest, storage, aml
    public commandChannel: Channel = "file";
}