"...composable_kernel.git" did not exist on "675aa69e45381bf8b179f9faf7db8cf726c0e004"
Unverified Commit 800c675f authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Fix pai reuse mode (#4027)

parent cc5a4fc5
...@@ -16,7 +16,9 @@ _logger = logging.getLogger(__name__) ...@@ -16,7 +16,9 @@ _logger = logging.getLogger(__name__)
def to_v2(v1) -> ExperimentConfig: def to_v2(v1) -> ExperimentConfig:
v1 = copy.deepcopy(v1) v1 = copy.deepcopy(v1)
platform = v1.pop('trainingServicePlatform') platform = v1.pop('trainingServicePlatform')
assert platform in ['local', 'remote', 'openpai', 'aml'] assert platform in ['local', 'remote', 'pai', 'aml']
if platform == 'pai':
platform = 'openpai'
v2 = ExperimentConfig(platform) v2 = ExperimentConfig(platform)
_drop_field(v1, 'authorName') _drop_field(v1, 'authorName')
...@@ -88,7 +90,7 @@ def to_v2(v1) -> ExperimentConfig: ...@@ -88,7 +90,7 @@ def to_v2(v1) -> ExperimentConfig:
if 'memoryMB' in v1_trial: if 'memoryMB' in v1_trial:
ts.trial_memory_size = str(v1_trial.pop('memoryMB')) + 'mb' ts.trial_memory_size = str(v1_trial.pop('memoryMB')) + 'mb'
_move_field(v1_trial, ts, 'image', 'docker_image') _move_field(v1_trial, ts, 'image', 'docker_image')
_deprecate(v1_trial, v2, 'virtualCluster') _move_field(v1_trial, ts, 'virtualCluster', 'virtual_cluster')
_move_field(v1_trial, ts, 'paiStorageConfigName', 'storage_config_name') _move_field(v1_trial, ts, 'paiStorageConfigName', 'storage_config_name')
_move_field(v1_trial, ts, 'paiConfigPath', 'openpaiConfigFile') _move_field(v1_trial, ts, 'paiConfigPath', 'openpaiConfigFile')
......
...@@ -21,6 +21,7 @@ class OpenpaiConfig(TrainingServiceConfig): ...@@ -21,6 +21,7 @@ class OpenpaiConfig(TrainingServiceConfig):
trial_memory_size: str trial_memory_size: str
storage_config_name: str storage_config_name: str
docker_image: str = 'msranni/nni:latest' docker_image: str = 'msranni/nni:latest'
virtual_cluster: Optional[str]
local_storage_mount_point: PathLike local_storage_mount_point: PathLike
container_storage_mount_point: str container_storage_mount_point: str
reuse_mode: bool = True reuse_mode: bool = True
......
...@@ -58,6 +58,7 @@ export interface OpenpaiConfig extends TrainingServiceConfig { ...@@ -58,6 +58,7 @@ export interface OpenpaiConfig extends TrainingServiceConfig {
containerStorageMountPoint: string; containerStorageMountPoint: string;
reuseMode: boolean; reuseMode: boolean;
openpaiConfig?: object; openpaiConfig?: object;
virtualCluster?: string;
} }
/* AML */ /* AML */
...@@ -198,7 +199,7 @@ export function toSeconds(time: string): number { ...@@ -198,7 +199,7 @@ export function toSeconds(time: string): number {
throw new Error(`Bad time string "${time}"`); throw new Error(`Bad time string "${time}"`);
} }
const sizeUnits = { tb: 1024 * 1024, gb: 1024 * 1024, mb: 1, kb: 1 / 1024 }; const sizeUnits = { tb: 1024 * 1024, gb: 1024, mb: 1, kb: 1 / 1024 };
export function toMegaBytes(size: string): number { export function toMegaBytes(size: string): number {
for (const [unit, factor] of Object.entries(sizeUnits)) { for (const [unit, factor] of Object.entries(sizeUnits)) {
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
import * as yaml from 'js-yaml'; import * as yaml from 'js-yaml';
import * as request from 'request'; import * as request from 'request';
import { Container, Scope } from 'typescript-ioc';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import * as component from '../../../common/component'; import * as component from '../../../common/component';
import { ExperimentConfig, OpenpaiConfig, flattenConfig, toMegaBytes } from '../../../common/experimentConfig'; import { ExperimentConfig, OpenpaiConfig, flattenConfig, toMegaBytes } from '../../../common/experimentConfig';
...@@ -15,6 +16,7 @@ import { NNIPAITrialConfig } from '../../pai/paiConfig'; ...@@ -15,6 +16,7 @@ import { NNIPAITrialConfig } from '../../pai/paiConfig';
import { EnvironmentInformation, EnvironmentService } from '../environment'; import { EnvironmentInformation, EnvironmentService } from '../environment';
import { SharedStorageService } from '../sharedStorage'; import { SharedStorageService } from '../sharedStorage';
import { MountedStorageService } from '../storages/mountedStorageService'; import { MountedStorageService } from '../storages/mountedStorageService';
import { StorageService } from '../storageService';
interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { } interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { }
...@@ -38,9 +40,10 @@ export class OpenPaiEnvironmentService extends EnvironmentService { ...@@ -38,9 +40,10 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
this.config = flattenConfig(config, 'openpai'); this.config = flattenConfig(config, 'openpai');
this.paiToken = this.config.token; this.paiToken = this.config.token;
this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http'; this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http';
Container.bind(StorageService)
// FIXME: only support MountedStorageService .to(MountedStorageService)
const storageService = new MountedStorageService(); .scope(Scope.Singleton);
const storageService = component.get<StorageService>(StorageService)
const remoteRoot = storageService.joinPath(this.config.localStorageMountPoint, this.experimentId); const remoteRoot = storageService.joinPath(this.config.localStorageMountPoint, this.experimentId);
storageService.initialize(this.config.localStorageMountPoint, remoteRoot); storageService.initialize(this.config.localStorageMountPoint, remoteRoot);
} }
...@@ -286,7 +289,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService { ...@@ -286,7 +289,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
taskRetryCount: 0, taskRetryCount: 0,
dockerImage: 'docker_image_0', dockerImage: 'docker_image_0',
resourcePerInstance: { resourcePerInstance: {
gpu: this.config.trialGpuNumber, gpu: this.config.trialGpuNumber === undefined? 0: this.config.trialGpuNumber,
cpu: this.config.trialCpuNumber, cpu: this.config.trialCpuNumber,
memoryMB: toMegaBytes(this.config.trialMemorySize) memoryMB: toMegaBytes(this.config.trialMemorySize)
}, },
...@@ -304,9 +307,9 @@ export class OpenPaiEnvironmentService extends EnvironmentService { ...@@ -304,9 +307,9 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
submitFrom: 'submit-job-v2' submitFrom: 'submit-job-v2'
} }
} }
if (this.config.deprecated && this.config.deprecated.virtualCluster) { if (this.config.virtualCluster) {
nniJobConfig.defaults = { nniJobConfig.defaults = {
virtualCluster: this.config.deprecated.virtualCluster virtualCluster: this.config.virtualCluster
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment