kubernetesJobInfoCollector.ts 1.84 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
3
4
5
6

'use strict';

import * as assert from 'assert';
7
import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
8
9
10
11
12
13
14
15
16
import { getLogger, Logger } from '../../common/log';
import { TrialJobStatus } from '../../common/trainingService';
import { KubernetesCRDClient } from './kubernetesApiClient';
import { KubernetesTrialJobDetail } from './kubernetesData';

/**
 * Collector Kubeflow jobs info from Kubernetes cluster, and update kubeflow job status locally
 */
export class KubernetesJobInfoCollector {
chicm-ms's avatar
chicm-ms committed
17
    protected readonly trialJobsMap: Map<string, KubernetesTrialJobDetail>;
18
19
20
21
22
23
24
25
    protected readonly log: Logger = getLogger();
    protected readonly statusesNeedToCheck: TrialJobStatus[];

    constructor(jobMap: Map<string, KubernetesTrialJobDetail>) {
        this.trialJobsMap = jobMap;
        this.statusesNeedToCheck = ['RUNNING', 'WAITING'];
    }

26
    public async retrieveTrialStatus(kubernetesCRDClient: KubernetesCRDClient | undefined): Promise<void[]> {
27
        assert(kubernetesCRDClient !== undefined);
chicm-ms's avatar
chicm-ms committed
28
        const updateKubernetesTrialJobs: Promise<void>[] = [];
29
30
        for (const [trialJobId, kubernetesTrialJob] of this.trialJobsMap) {
            if (kubernetesTrialJob === undefined) {
31
32
                throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
            }
33
            updateKubernetesTrialJobs.push(this.retrieveSingleTrialJobInfo(kubernetesCRDClient, kubernetesTrialJob));
34
        }
35
        return Promise.all(updateKubernetesTrialJobs);
36
37
    }

38
39
    protected async retrieveSingleTrialJobInfo(_kubernetesCRDClient: KubernetesCRDClient | undefined,
                                               _kubernetesTrialJob: KubernetesTrialJobDetail): Promise<void> {
40
41
            throw new MethodNotImplementedError();
    }
42
}