paiConfig.ts 3.9 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
3

4
import { TrialJobApplicationForm, TrialJobDetail, TrialJobStatus } from 'common/trainingService';
SparkSnail's avatar
SparkSnail committed
5
import {TrialConfig} from '../common/trialConfig';
6
7
8

export class PAIClusterConfig {
    public readonly userName: string;
9
    public readonly passWord?: string;
SparkSnail's avatar
SparkSnail committed
10
    public host: string;
11
    public readonly token?: string;
12
    public readonly reuse?: boolean;
13

14
15
16
17
18
19
20
    public cpuNum?: number;
    public memoryMB?: number;
    public gpuNum?: number;

    public useActiveGpu?: boolean;
    public maxTrialNumPerGpu?: number;

21
22
23
24
25
    /**
     * Constructor
     * @param userName User name of PAI Cluster
     * @param passWord password of PAI Cluster
     * @param host Host IP of PAI Cluster
26
     * @param token PAI token of PAI Cluster
27
     * @param reuse If job is reusable for multiple trials
28
     */
29
30
    constructor(userName: string, host: string, passWord?: string, token?: string, reuse?: boolean,
        cpuNum?: number, memoryMB?: number, gpuNum?: number) {
31
32
33
        this.userName = userName;
        this.passWord = passWord;
        this.host = host;
34
        this.token = token;
35
        this.reuse = reuse;
36
37
38
        this.cpuNum = cpuNum;
        this.memoryMB = memoryMB;
        this.gpuNum = gpuNum;
39
40
41
    }
}

42
/**
43
 * PAI trial job detail
44
 */
45
46
47
48
49
50
51
52
53
54
55
56
57
export class PAITrialJobDetail implements TrialJobDetail {
    public id: string;
    public status: TrialJobStatus;
    public paiJobName: string;
    public submitTime: number;
    public startTime?: number;
    public endTime?: number;
    public tags?: string[];
    public url?: string;
    public workingDirectory: string;
    public form: TrialJobApplicationForm;
    public logPath: string;
    public isEarlyStopped?: boolean;
SparkSnail's avatar
SparkSnail committed
58
    public paiJobDetailUrl?: string;
59
60

    constructor(id: string, status: TrialJobStatus, paiJobName: string,
SparkSnail's avatar
SparkSnail committed
61
                submitTime: number, workingDirectory: string, form: TrialJobApplicationForm, logPath: string, paiJobDetailUrl?: string) {
62
63
64
65
66
67
68
69
        this.id = id;
        this.status = status;
        this.paiJobName = paiJobName;
        this.submitTime = submitTime;
        this.workingDirectory = workingDirectory;
        this.form = form;
        this.tags = [];
        this.logPath = logPath;
SparkSnail's avatar
SparkSnail committed
70
        this.paiJobDetailUrl = paiJobDetailUrl;
71
    }
72
}
SparkSnail's avatar
SparkSnail committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

export const PAI_TRIAL_COMMAND_FORMAT: string =
`export NNI_PLATFORM=pai NNI_SYS_DIR={0} NNI_OUTPUT_DIR={1} NNI_TRIAL_JOB_ID={2} NNI_EXP_ID={3} NNI_TRIAL_SEQ_ID={4} MULTI_PHASE={5} \
&& NNI_CODE_DIR={6} && mkdir -p $NNI_SYS_DIR/code && cp -r $NNI_CODE_DIR/. $NNI_SYS_DIR/code && sh $NNI_SYS_DIR/install_nni.sh \
&& cd $NNI_SYS_DIR/code && python3 -m nni.tools.trial_tool.trial_keeper --trial_command '{7}' --nnimanager_ip '{8}' --nnimanager_port '{9}' \
--nni_manager_version '{10}' --log_collection '{11}' | tee $NNI_OUTPUT_DIR/trial.log`;

/**
 * PAI trial configuration
 */
export class NNIPAITrialConfig extends TrialConfig {
    public readonly cpuNum: number;
    public readonly memoryMB: number;
    public readonly image: string;
    public virtualCluster?: string;
    public readonly nniManagerNFSMountPath: string;
    public readonly containerNFSMountPath: string;
    public readonly paiStorageConfigName: string;
    public readonly paiConfigPath?: string;

    constructor(command: string, codeDir: string, gpuNum: number, cpuNum: number, memoryMB: number,
                image: string, nniManagerNFSMountPath: string, containerNFSMountPath: string,
                paiStorageConfigName: string, virtualCluster?: string, paiConfigPath?: string) {
        super(command, codeDir, gpuNum);
        this.cpuNum = cpuNum;
        this.memoryMB = memoryMB;
        this.image = image;
        this.virtualCluster = virtualCluster;
        this.nniManagerNFSMountPath = nniManagerNFSMountPath;
        this.containerNFSMountPath = containerNFSMountPath;
        this.paiStorageConfigName = paiStorageConfigName;
        this.paiConfigPath = paiConfigPath;
    }
}