kubernetesJobInfoCollector.ts 1.83 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
3

4
5
6
7
import assert from 'assert';
import { MethodNotImplementedError, NNIError, NNIErrorNames } from 'common/errors';
import { getLogger, Logger } from 'common/log';
import { TrialJobStatus } from 'common/trainingService';
8
9
10
11
12
13
14
import { KubernetesCRDClient } from './kubernetesApiClient';
import { KubernetesTrialJobDetail } from './kubernetesData';

/**
 * Collector Kubeflow jobs info from Kubernetes cluster, and update kubeflow job status locally
 */
export class KubernetesJobInfoCollector {
chicm-ms's avatar
chicm-ms committed
15
    protected readonly trialJobsMap: Map<string, KubernetesTrialJobDetail>;
liuzhe-lz's avatar
liuzhe-lz committed
16
    protected readonly log: Logger = getLogger('KubernetesJobInfoCollector');
17
18
19
20
21
22
23
    protected readonly statusesNeedToCheck: TrialJobStatus[];

    constructor(jobMap: Map<string, KubernetesTrialJobDetail>) {
        this.trialJobsMap = jobMap;
        this.statusesNeedToCheck = ['RUNNING', 'WAITING'];
    }

24
    public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined): Promise<void[]> {
25
        assert(kubernetesCRDClient !== undefined);
chicm-ms's avatar
chicm-ms committed
26
        const updateKubernetesTrialJobs: Promise<void>[] = [];
27
28
        for (const [trialJobId, kubernetesTrialJob] of this.trialJobsMap) {
            if (kubernetesTrialJob === undefined) {
29
30
                throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
            }
31
            updateKubernetesTrialJobs.push(this.retrieveSingleTrialJobInfo(kubernetesCRDClient, kubernetesTrialJob));
32
        }
33
        return Promise.all(updateKubernetesTrialJobs);
34
35
    }

36
37
    protected async retrieveSingleTrialJobInfo(_kubernetesCRDClient: KubernetesCRDClient | undefined,
                                               _kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
38
39
            throw new MethodNotImplementedError();
    }
40
}