paiConfig.ts 3.92 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
3
4
5

'use strict';

6
import { TrialJobApplicationForm, TrialJobDetail, TrialJobStatus } from '../../common/trainingService';
SparkSnail's avatar
SparkSnail committed
7
import {TrialConfig} from '../common/trialConfig';
8
9
10

export class PAIClusterConfig {
    public readonly userName: string;
11
    public readonly passWord?: string;
SparkSnail's avatar
SparkSnail committed
12
    public host: string;
13
    public readonly token?: string;
14
    public readonly reuse?: boolean;
15

16
17
18
19
20
21
22
    public cpuNum?: number;
    public memoryMB?: number;
    public gpuNum?: number;

    public useActiveGpu?: boolean;
    public maxTrialNumPerGpu?: number;

23
24
25
26
27
    /**
     * Constructor
     * @param userName User name of PAI Cluster
     * @param passWord password of PAI Cluster
     * @param host Host IP of PAI Cluster
28
     * @param token PAI token of PAI Cluster
29
     * @param reuse If job is reusable for multiple trials
30
     */
31
32
    constructor(userName: string, host: string, passWord?: string, token?: string, reuse?: boolean,
        cpuNum?: number, memoryMB?: number, gpuNum?: number) {
33
34
35
        this.userName = userName;
        this.passWord = passWord;
        this.host = host;
36
        this.token = token;
37
        this.reuse = reuse;
38
39
40
        this.cpuNum = cpuNum;
        this.memoryMB = memoryMB;
        this.gpuNum = gpuNum;
41
42
43
    }
}

44
/**
45
 * PAI trial job detail
46
 */
47
48
49
50
51
52
53
54
55
56
57
58
59
export class PAITrialJobDetail implements TrialJobDetail {
    public id: string;
    public status: TrialJobStatus;
    public paiJobName: string;
    public submitTime: number;
    public startTime?: number;
    public endTime?: number;
    public tags?: string[];
    public url?: string;
    public workingDirectory: string;
    public form: TrialJobApplicationForm;
    public logPath: string;
    public isEarlyStopped?: boolean;
SparkSnail's avatar
SparkSnail committed
60
    public paiJobDetailUrl?: string;
61
62

    constructor(id: string, status: TrialJobStatus, paiJobName: string,
SparkSnail's avatar
SparkSnail committed
63
                submitTime: number, workingDirectory: string, form: TrialJobApplicationForm, logPath: string, paiJobDetailUrl?: string) {
64
65
66
67
68
69
70
71
        this.id = id;
        this.status = status;
        this.paiJobName = paiJobName;
        this.submitTime = submitTime;
        this.workingDirectory = workingDirectory;
        this.form = form;
        this.tags = [];
        this.logPath = logPath;
SparkSnail's avatar
SparkSnail committed
72
        this.paiJobDetailUrl = paiJobDetailUrl;
73
    }
74
}
SparkSnail's avatar
SparkSnail committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

export const PAI_TRIAL_COMMAND_FORMAT: string =
`export NNI_PLATFORM=pai NNI_SYS_DIR={0} NNI_OUTPUT_DIR={1} NNI_TRIAL_JOB_ID={2} NNI_EXP_ID={3} NNI_TRIAL_SEQ_ID={4} MULTI_PHASE={5} \
&& NNI_CODE_DIR={6} && mkdir -p $NNI_SYS_DIR/code && cp -r $NNI_CODE_DIR/. $NNI_SYS_DIR/code && sh $NNI_SYS_DIR/install_nni.sh \
&& cd $NNI_SYS_DIR/code && python3 -m nni.tools.trial_tool.trial_keeper --trial_command '{7}' --nnimanager_ip '{8}' --nnimanager_port '{9}' \
--nni_manager_version '{10}' --log_collection '{11}' | tee $NNI_OUTPUT_DIR/trial.log`;

/**
 * PAI trial configuration
 */
export class NNIPAITrialConfig extends TrialConfig {
    public readonly cpuNum: number;
    public readonly memoryMB: number;
    public readonly image: string;
    public virtualCluster?: string;
    public readonly nniManagerNFSMountPath: string;
    public readonly containerNFSMountPath: string;
    public readonly paiStorageConfigName: string;
    public readonly paiConfigPath?: string;

    constructor(command: string, codeDir: string, gpuNum: number, cpuNum: number, memoryMB: number,
                image: string, nniManagerNFSMountPath: string, containerNFSMountPath: string,
                paiStorageConfigName: string, virtualCluster?: string, paiConfigPath?: string) {
        super(command, codeDir, gpuNum);
        this.cpuNum = cpuNum;
        this.memoryMB = memoryMB;
        this.image = image;
        this.virtualCluster = virtualCluster;
        this.nniManagerNFSMountPath = nniManagerNFSMountPath;
        this.containerNFSMountPath = containerNFSMountPath;
        this.paiStorageConfigName = paiStorageConfigName;
        this.paiConfigPath = paiConfigPath;
    }
}