remoteMachineData.ts 4.65 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
Deshui Yu's avatar
Deshui Yu committed
3
4
5

'use strict';

6
import { TrialJobApplicationForm, TrialJobDetail, TrialJobStatus } from '../../common/trainingService';
7
import { RemoteMachineConfig } from '../../common/experimentConfig';
8
import { GPUInfo, GPUSummary, ScheduleResultType } from '../common/gpuData';
9
import { ShellExecutor } from './shellExecutor';
Deshui Yu's avatar
Deshui Yu committed
10
11
12
13
14

/**
 * Metadata of remote machine for configuration and statuc query
 */
export class RemoteMachineMeta {
15
    public readonly config: RemoteMachineConfig;
chicm-ms's avatar
chicm-ms committed
16
    public gpuSummary: GPUSummary | undefined;
17
18
19
20
21
22
    public occupiedGpuIndexMap: Map<number, number>;

    constructor(config: RemoteMachineConfig) {
        this.config = config;
        this.occupiedGpuIndexMap = new Map<number, number>();
    }
23
24
}

Deshui Yu's avatar
Deshui Yu committed
25
26
27
28
/**
 * The execution result for command executed on remote machine
 */
export class RemoteCommandResult {
chicm-ms's avatar
chicm-ms committed
29
30
31
    public readonly stdout: string;
    public readonly stderr: string;
    public readonly exitCode: number;
Deshui Yu's avatar
Deshui Yu committed
32

chicm-ms's avatar
chicm-ms committed
33
    constructor(stdout: string, stderr: string, exitCode: number) {
Deshui Yu's avatar
Deshui Yu committed
34
35
36
37
38
39
40
41
42
43
44
45
        this.stdout = stdout;
        this.stderr = stderr;
        this.exitCode = exitCode;
    }
}

/**
 * RemoteMachineTrialJobDetail
 */
export class RemoteMachineTrialJobDetail implements TrialJobDetail {
    public id: string;
    public status: TrialJobStatus;
46
47
48
    public submitTime: number;
    public startTime?: number;
    public endTime?: number;
Deshui Yu's avatar
Deshui Yu committed
49
50
51
    public tags?: string[];
    public url?: string;
    public workingDirectory: string;
52
    public form: TrialJobApplicationForm;
Deshui Yu's avatar
Deshui Yu committed
53
    public rmMeta?: RemoteMachineMeta;
54
    public isEarlyStopped?: boolean;
55
    public gpuIndices: GPUInfo[];
Deshui Yu's avatar
Deshui Yu committed
56

57
    constructor(id: string, status: TrialJobStatus, submitTime: number,
58
        workingDirectory: string, form: TrialJobApplicationForm) {
Deshui Yu's avatar
Deshui Yu committed
59
60
61
62
63
64
        this.id = id;
        this.status = status;
        this.submitTime = submitTime;
        this.workingDirectory = workingDirectory;
        this.form = form;
        this.tags = [];
65
        this.gpuIndices = [];
Deshui Yu's avatar
Deshui Yu committed
66
67
68
    }
}

SparkSnail's avatar
SparkSnail committed
69
/**
70
 * The remote machine executor manager
SparkSnail's avatar
SparkSnail committed
71
 */
72
export class ExecutorManager {
73
    public readonly rmMeta: RemoteMachineMeta;
74
75
76
77
    private readonly executorMap: Map<string, ShellExecutor> = new Map<string, ShellExecutor>();

    private executors: ShellExecutor[] = [];

78
79
    constructor(config: RemoteMachineConfig) {
        this.rmMeta = new RemoteMachineMeta(config);
SparkSnail's avatar
SparkSnail committed
80
81
    }

82
83
84
    public async getExecutor(id: string): Promise<ShellExecutor> {
        let isFound = false;
        let executor: ShellExecutor | undefined;
85

86
87
88
89
90
        // already assigned
        if (this.executorMap.has(id)) {
            executor = this.executorMap.get(id);
            if (executor === undefined) {
                throw new Error("executor shouldn't be undefined before return!");
SparkSnail's avatar
SparkSnail committed
91
            }
92
            return executor;
93
94
        }

95
96
97
98
99
100
101
102
103
104
105
        for (const candidateExecutor of this.executors) {
            if (candidateExecutor.addUsage()) {
                isFound = true;
                executor = candidateExecutor;
                break;
            }
        }
        // init a new executor if no free one.
        if (!isFound) {
            executor = await this.createShellExecutor();
        }
106

107
108
109
110
        if (executor === undefined) {
            throw new Error("executor shouldn't be undefined before set!");
        }
        this.executorMap.set(id, executor);
111

112
        return executor;
SparkSnail's avatar
SparkSnail committed
113
    }
114

SparkSnail's avatar
SparkSnail committed
115
    /**
116
     * close all of executor
SparkSnail's avatar
SparkSnail committed
117
     */
118
119
120
    public releaseAllExecutor(): void {
        this.executorMap.clear();
        for (const executor of this.executors) {
121
            executor.close();
SparkSnail's avatar
SparkSnail committed
122
        }
123
        this.executors = [];
SparkSnail's avatar
SparkSnail committed
124
    }
125

SparkSnail's avatar
SparkSnail committed
126
    /**
127
128
     * retrieve resource, minus a number for given executor
     * @param executor executor
SparkSnail's avatar
SparkSnail committed
129
     */
130
131
    public releaseExecutor(id: string): void {
        const executor = this.executorMap.get(id);
132
        if (executor === undefined) {
133
            throw new Error(`executor for ${id} is not found`);
SparkSnail's avatar
SparkSnail committed
134
        }
135
136
        executor.releaseUsage();
        this.executorMap.delete(id);
SparkSnail's avatar
SparkSnail committed
137
138
    }

139
    /**
140
     * Create a new connection executor and initialize it
141
     */
142
    private async createShellExecutor(): Promise<ShellExecutor> {
143
144
        const executor = new ShellExecutor();
        await executor.initialize(this.rmMeta);
145
146
147
148
        if (!executor.addUsage()) {
            throw new Error("failed to add usage on new created Executor! It's a wired bug!");
        }
        this.executors.push(executor);
149
        return executor;
150
151
    }
}
SparkSnail's avatar
SparkSnail committed
152

153
export type RemoteMachineScheduleResult = { scheduleInfo: RemoteMachineScheduleInfo | undefined; resultType: ScheduleResultType };
Deshui Yu's avatar
Deshui Yu committed
154

155
export type RemoteMachineScheduleInfo = { rmMeta: RemoteMachineMeta; cudaVisibleDevice: string };