Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
d5857823
Unverified
Commit
d5857823
authored
Dec 20, 2021
by
liuzhe-lz
Committed by
GitHub
Dec 20, 2021
Browse files
Config refactor (#4370)
parent
cb090e8c
Changes
70
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
50 additions
and
85 deletions
+50
-85
ts/nni_manager/training_service/reusable/aml/amlConfig.ts
ts/nni_manager/training_service/reusable/aml/amlConfig.ts
+1
-3
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
...ng_service/reusable/environments/amlEnvironmentService.ts
+4
-9
ts/nni_manager/training_service/reusable/environments/dlcEnvironmentService.ts
...ng_service/reusable/environments/dlcEnvironmentService.ts
+4
-6
ts/nni_manager/training_service/reusable/environments/environmentServiceFactory.ts
...ervice/reusable/environments/environmentServiceFactory.ts
+10
-9
ts/nni_manager/training_service/reusable/environments/kubernetes/frameworkcontrollerEnvironmentService.ts
...ments/kubernetes/frameworkcontrollerEnvironmentService.ts
+10
-28
ts/nni_manager/training_service/reusable/environments/kubernetes/kubeflowEnvironmentService.ts
...ble/environments/kubernetes/kubeflowEnvironmentService.ts
+10
-15
ts/nni_manager/training_service/reusable/environments/kubernetes/kubernetesEnvironmentService.ts
...e/environments/kubernetes/kubernetesEnvironmentService.ts
+1
-1
ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts
...ervice/reusable/environments/openPaiEnvironmentService.ts
+4
-6
ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts
...service/reusable/environments/remoteEnvironmentService.ts
+4
-6
ts/nni_manager/training_service/reusable/routerTrainingService.ts
...anager/training_service/reusable/routerTrainingService.ts
+2
-2
No files found.
ts/nni_manager/training_service/reusable/aml/amlConfig.ts
View file @
d5857823
...
...
@@ -10,16 +10,14 @@ export class AMLClusterConfig {
public
readonly
resourceGroup
:
string
;
public
readonly
workspaceName
:
string
;
public
readonly
computeTarget
:
string
;
public
useActiveGpu
?:
boolean
;
public
maxTrialNumPerGpu
?:
number
;
constructor
(
subscriptionId
:
string
,
resourceGroup
:
string
,
workspaceName
:
string
,
computeTarget
:
string
,
useActiveGpu
?:
boolean
,
maxTrialNumPerGpu
?:
number
)
{
maxTrialNumPerGpu
?:
number
)
{
this
.
subscriptionId
=
subscriptionId
;
this
.
resourceGroup
=
resourceGroup
;
this
.
workspaceName
=
workspaceName
;
this
.
computeTarget
=
computeTarget
;
this
.
useActiveGpu
=
useActiveGpu
;
this
.
maxTrialNumPerGpu
=
maxTrialNumPerGpu
;
}
}
...
...
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
View file @
d5857823
...
...
@@ -5,7 +5,7 @@ import fs from 'fs';
import
path
from
'
path
'
;
import
*
as
component
from
'
common/component
'
;
import
{
getLogger
,
Logger
}
from
'
common/log
'
;
import
{
ExperimentConfig
,
AmlConfig
,
flatten
Config
}
from
'
common/experimentConfig
'
;
import
{
Aml
Config
}
from
'
common/experimentConfig
'
;
import
{
ExperimentStartupInfo
}
from
'
common/experimentStartupInfo
'
;
import
{
validateCodeDir
}
from
'
training_service/common/util
'
;
import
{
AMLClient
}
from
'
../aml/amlClient
'
;
...
...
@@ -15,8 +15,6 @@ import { EventEmitter } from "events";
import
{
AMLCommandChannel
}
from
'
../channels/amlCommandChannel
'
;
import
{
SharedStorageService
}
from
'
../sharedStorage
'
interface
FlattenAmlConfig
extends
ExperimentConfig
,
AmlConfig
{
}
/**
* Collector AML jobs info from AML cluster, and update aml job status locally
*/
...
...
@@ -26,13 +24,13 @@ export class AMLEnvironmentService extends EnvironmentService {
private
readonly
log
:
Logger
=
getLogger
(
'
AMLEnvironmentService
'
);
private
experimentId
:
string
;
private
experimentRootDir
:
string
;
private
config
:
Flatten
AmlConfig
;
private
config
:
AmlConfig
;
constructor
(
config
:
Experiment
Config
,
info
:
ExperimentStartupInfo
)
{
constructor
(
config
:
Aml
Config
,
info
:
ExperimentStartupInfo
)
{
super
();
this
.
experimentId
=
info
.
experimentId
;
this
.
experimentRootDir
=
info
.
logDir
;
this
.
config
=
flattenConfig
(
config
,
'
aml
'
)
;
this
.
config
=
config
;
validateCodeDir
(
this
.
config
.
trialCodeDirectory
);
}
...
...
@@ -98,9 +96,6 @@ export class AMLEnvironmentService extends EnvironmentService {
amlEnvironment
.
command
=
`mv envs outputs/envs && cd outputs &&
${
amlEnvironment
.
command
}
`
;
}
amlEnvironment
.
command
=
`import os\nos.system('
${
amlEnvironment
.
command
}
')`
;
if
(
this
.
config
.
deprecated
&&
this
.
config
.
deprecated
.
useActiveGpu
!==
undefined
)
{
amlEnvironment
.
useActiveGpu
=
this
.
config
.
deprecated
.
useActiveGpu
;
}
amlEnvironment
.
maxTrialNumberPerGpu
=
this
.
config
.
maxTrialNumberPerGpu
;
await
fs
.
promises
.
writeFile
(
path
.
join
(
environmentLocalTempFolder
,
'
nni_script.py
'
),
amlEnvironment
.
command
,
{
encoding
:
'
utf8
'
});
...
...
ts/nni_manager/training_service/reusable/environments/dlcEnvironmentService.ts
View file @
d5857823
...
...
@@ -5,7 +5,7 @@ import fs from 'fs';
import
path
from
'
path
'
;
import
*
as
component
from
'
common/component
'
;
import
{
getLogger
,
Logger
}
from
'
common/log
'
;
import
{
ExperimentConfig
,
DlcConfig
,
flatten
Config
}
from
'
common/experimentConfig
'
;
import
{
Dlc
Config
}
from
'
common/experimentConfig
'
;
import
{
ExperimentStartupInfo
}
from
'
common/experimentStartupInfo
'
;
import
{
DlcClient
}
from
'
../dlc/dlcClient
'
;
import
{
DlcEnvironmentInformation
}
from
'
../dlc/dlcConfig
'
;
...
...
@@ -16,8 +16,6 @@ import { MountedStorageService } from '../storages/mountedStorageService';
import
{
Scope
}
from
'
typescript-ioc
'
;
import
{
StorageService
}
from
'
../storageService
'
;
interface
FlattenDlcConfig
extends
ExperimentConfig
,
DlcConfig
{
}
/**
* Collector DLC jobs info from DLC cluster, and update dlc job status locally
*/
...
...
@@ -26,12 +24,12 @@ export class DlcEnvironmentService extends EnvironmentService {
private
readonly
log
:
Logger
=
getLogger
(
'
dlcEnvironmentService
'
);
private
experimentId
:
string
;
private
config
:
Flatten
DlcConfig
;
private
config
:
DlcConfig
;
constructor
(
config
:
Experiment
Config
,
info
:
ExperimentStartupInfo
)
{
constructor
(
config
:
Dlc
Config
,
info
:
ExperimentStartupInfo
)
{
super
();
this
.
experimentId
=
info
.
experimentId
;
this
.
config
=
flattenConfig
(
config
,
'
dlc
'
)
;
this
.
config
=
config
;
component
.
Container
.
bind
(
StorageService
).
to
(
MountedStorageService
).
scope
(
Scope
.
Singleton
);
const
storageService
=
component
.
get
<
StorageService
>
(
StorageService
)
const
remoteRoot
=
storageService
.
joinPath
(
this
.
config
.
localStorageMountPoint
,
'
nni-experiments
'
,
this
.
experimentId
);
...
...
ts/nni_manager/training_service/reusable/environments/environmentServiceFactory.ts
View file @
d5857823
...
...
@@ -13,22 +13,23 @@ import { DlcEnvironmentService } from './dlcEnvironmentService';
export
async
function
createEnvironmentService
(
name
:
string
,
config
:
ExperimentConfig
):
Promise
<
EnvironmentService
>
{
const
info
=
ExperimentStartupInfo
.
getInstance
();
const
tsConfig
:
any
=
config
.
trainingService
;
switch
(
name
)
{
switch
(
name
)
{
case
'
local
'
:
return
new
LocalEnvironmentService
(
c
onfig
,
info
);
return
new
LocalEnvironmentService
(
tsC
onfig
,
info
);
case
'
remote
'
:
return
new
RemoteEnvironmentService
(
c
onfig
,
info
);
return
new
RemoteEnvironmentService
(
tsC
onfig
,
info
);
case
'
aml
'
:
return
new
AMLEnvironmentService
(
c
onfig
,
info
);
return
new
AMLEnvironmentService
(
tsC
onfig
,
info
);
case
'
openpai
'
:
return
new
OpenPaiEnvironmentService
(
c
onfig
,
info
);
return
new
OpenPaiEnvironmentService
(
tsC
onfig
,
info
);
case
'
kubeflow
'
:
return
new
KubeflowEnvironmentService
(
c
onfig
,
info
);
return
new
KubeflowEnvironmentService
(
tsC
onfig
,
info
);
case
'
frameworkcontroller
'
:
return
new
FrameworkControllerEnvironmentService
(
c
onfig
,
info
);
return
new
FrameworkControllerEnvironmentService
(
tsC
onfig
,
info
);
case
'
dlc
'
:
return
new
DlcEnvironmentService
(
c
onfig
,
info
);
return
new
DlcEnvironmentService
(
tsC
onfig
,
info
);
}
const
esConfig
=
await
getCustomEnvironmentServiceConfig
(
name
);
...
...
@@ -37,5 +38,5 @@ export async function createEnvironmentService(name: string, config: ExperimentC
}
const
esModule
=
importModule
(
esConfig
.
nodeModulePath
);
const
esClass
=
esModule
[
esConfig
.
nodeClassName
]
as
any
;
return
new
esClass
(
c
onfig
,
info
);
return
new
esClass
(
tsC
onfig
,
info
);
}
ts/nni_manager/training_service/reusable/environments/kubernetes/frameworkcontrollerEnvironmentService.ts
View file @
d5857823
...
...
@@ -6,7 +6,7 @@
import
*
as
fs
from
'
fs
'
;
import
*
as
path
from
'
path
'
;
import
*
as
component
from
'
../../../../common/component
'
;
import
{
ExperimentConfig
,
FrameworkControllerConfig
,
flattenConfig
,
FrameworkControllerTaskRoleConfig
}
from
'
../../../../common/experimentConfig
'
;
import
{
FrameworkControllerConfig
,
FrameworkControllerTaskRoleConfig
,
toMegaBytes
}
from
'
../../../../common/experimentConfig
'
;
import
{
ExperimentStartupInfo
}
from
'
../../../../common/experimentStartupInfo
'
;
import
{
EnvironmentInformation
}
from
'
../../environment
'
;
import
{
KubernetesEnvironmentService
}
from
'
./kubernetesEnvironmentService
'
;
...
...
@@ -15,23 +15,20 @@ import { FrameworkControllerClusterConfigAzure, FrameworkControllerJobStatus, Fr
FrameworkControllerJobCompleteStatus
}
from
'
../../../kubernetes/frameworkcontroller/frameworkcontrollerConfig
'
;
import
{
KeyVaultConfig
,
AzureStorage
}
from
'
../../../kubernetes/kubernetesConfig
'
;
interface
FlattenKubeflowConfig
extends
ExperimentConfig
,
FrameworkControllerConfig
{
}
@
component
.
Singleton
export
class
FrameworkControllerEnvironmentService
extends
KubernetesEnvironmentService
{
private
config
:
F
lattenKubeflow
Config
;
private
config
:
F
rameworkController
Config
;
private
createStoragePromise
?:
Promise
<
void
>
;
private
readonly
fcContainerPortMap
:
Map
<
string
,
number
>
=
new
Map
<
string
,
number
>
();
// store frameworkcontroller container port
constructor
(
config
:
Experiment
Config
,
info
:
ExperimentStartupInfo
)
{
constructor
(
config
:
FrameworkController
Config
,
info
:
ExperimentStartupInfo
)
{
super
(
config
,
info
);
this
.
experimentId
=
info
.
experimentId
;
this
.
config
=
flattenConfig
(
config
,
'
frameworkcontroller
'
)
;
this
.
config
=
config
;
// Create kubernetesCRDClient
this
.
kubernetesCRDClient
=
FrameworkControllerClientFactory
.
createClient
(
this
.
config
.
namespace
);
this
.
kubernetesCRDClient
=
FrameworkControllerClientFactory
.
createClient
(
this
.
config
.
namespace
);
// Create storage
if
(
this
.
config
.
storage
.
storageType
===
'
azureStorage
'
)
{
if
(
this
.
config
.
storage
.
azureShare
===
undefined
||
...
...
@@ -40,27 +37,15 @@ export class FrameworkControllerEnvironmentService extends KubernetesEnvironment
this
.
config
.
storage
.
keyVaultKey
===
undefined
)
{
throw
new
Error
(
"
Azure storage configuration error!
"
);
}
const
azureStorage
:
AzureStorage
=
new
AzureStorage
(
this
.
config
.
storage
.
azureShare
,
this
.
config
.
storage
.
azureAccount
);
const
keyValutConfig
:
KeyVaultConfig
=
new
KeyVaultConfig
(
this
.
config
.
storage
.
keyVaultName
,
this
.
config
.
storage
.
keyVaultKey
);
const
azureKubeflowClusterConfig
:
FrameworkControllerClusterConfigAzure
=
new
FrameworkControllerClusterConfigAzure
(
this
.
config
.
namespace
,
this
.
config
.
apiVersion
,
keyValutConfig
,
azureStorage
);
this
.
azureStorageAccountName
=
azureKubeflowClusterConfig
.
azureStorage
.
accountName
;
this
.
azureStorageShare
=
azureKubeflowClusterConfig
.
azureStorage
.
azureShare
;
this
.
createStoragePromise
=
this
.
createAzureStorage
(
azureKubeflowClusterConfig
.
keyVault
.
vaultName
,
azureKubeflowClusterConfig
.
keyVault
.
name
);
this
.
azureStorageAccountName
=
this
.
config
.
storage
.
azureAccount
;
this
.
azureStorageShare
=
this
.
config
.
storage
.
azureShare
;
this
.
createStoragePromise
=
this
.
createAzureStorage
(
this
.
config
.
storage
.
keyVaultName
,
this
.
config
.
storage
.
keyVaultKey
);
}
else
if
(
this
.
config
.
storage
.
storageType
===
'
nfs
'
)
{
if
(
this
.
config
.
storage
.
server
===
undefined
||
this
.
config
.
storage
.
path
===
undefined
)
{
throw
new
Error
(
"
NFS storage configuration error!
"
);
}
this
.
createStoragePromise
=
this
.
createNFSStorage
(
this
.
config
.
storage
.
server
,
this
.
config
.
storage
.
path
);
this
.
createStoragePromise
=
this
.
createNFSStorage
(
this
.
config
.
storage
.
server
,
this
.
config
.
storage
.
path
);
}
}
...
...
@@ -91,9 +76,6 @@ export class FrameworkControllerEnvironmentService extends KubernetesEnvironment
const
expFolder
=
`
${
this
.
CONTAINER_MOUNT_PATH
}
/nni/
${
this
.
experimentId
}
`
;
environment
.
command
=
`cd
${
expFolder
}
&&
${
environment
.
command
}
\
1>
${
expFolder
}
/envs/
${
environment
.
id
}
/trialrunner_stdout 2>
${
expFolder
}
/envs/
${
environment
.
id
}
/trialrunner_stderr`
;
if
(
this
.
config
.
deprecated
&&
this
.
config
.
deprecated
.
useActiveGpu
!==
undefined
)
{
environment
.
useActiveGpu
=
this
.
config
.
deprecated
.
useActiveGpu
;
}
environment
.
maxTrialNumberPerGpu
=
this
.
config
.
maxTrialNumberPerGpu
;
const
frameworkcontrollerJobName
:
string
=
`nniexp
${
this
.
experimentId
}
env
${
environment
.
id
}
`
.
toLowerCase
();
...
...
@@ -148,7 +130,7 @@ export class FrameworkControllerEnvironmentService extends KubernetesEnvironment
const
podResources
:
any
=
[];
for
(
const
taskRole
of
this
.
config
.
taskRoles
)
{
const
resource
:
any
=
{};
resource
.
requests
=
this
.
generatePodResource
(
taskRole
.
memorySize
,
taskRole
.
cpuNumber
,
taskRole
.
gpuNumber
);
resource
.
requests
=
this
.
generatePodResource
(
toMegaBytes
(
taskRole
.
memorySize
)
,
taskRole
.
cpuNumber
,
taskRole
.
gpuNumber
);
resource
.
limits
=
{...
resource
.
requests
};
podResources
.
push
(
resource
);
}
...
...
ts/nni_manager/training_service/reusable/environments/kubernetes/kubeflowEnvironmentService.ts
View file @
d5857823
...
...
@@ -4,7 +4,7 @@
import
fs
from
'
fs
'
;
import
path
from
'
path
'
;
import
*
as
component
from
'
common/component
'
;
import
{
ExperimentConfig
,
KubeflowConfig
,
flattenConfig
}
from
'
common/experimentConfig
'
;
import
{
KubeflowConfig
,
toMegaBytes
}
from
'
common/experimentConfig
'
;
import
{
ExperimentStartupInfo
}
from
'
common/experimentStartupInfo
'
;
import
{
EnvironmentInformation
}
from
'
training_service/reusable/environment
'
;
import
{
KubernetesEnvironmentService
}
from
'
./kubernetesEnvironmentService
'
;
...
...
@@ -12,19 +12,17 @@ import { KubeflowOperatorClientFactory } from 'training_service/kubernetes/kubef
import
{
KubeflowClusterConfigAzure
}
from
'
training_service/kubernetes/kubeflow/kubeflowConfig
'
;
import
{
KeyVaultConfig
,
AzureStorage
}
from
'
training_service/kubernetes/kubernetesConfig
'
;
interface
FlattenKubeflowConfig
extends
ExperimentConfig
,
KubeflowConfig
{
}
@
component
.
Singleton
export
class
KubeflowEnvironmentService
extends
KubernetesEnvironmentService
{
private
config
:
Flatten
KubeflowConfig
;
private
config
:
KubeflowConfig
;
private
createStoragePromise
?:
Promise
<
void
>
;
constructor
(
config
:
Experiment
Config
,
info
:
ExperimentStartupInfo
)
{
constructor
(
config
:
Kubeflow
Config
,
info
:
ExperimentStartupInfo
)
{
super
(
config
,
info
);
this
.
experimentId
=
info
.
experimentId
;
this
.
config
=
flattenConfig
(
config
,
'
kubeflow
'
)
;
this
.
config
=
config
;
// Create kubernetesCRDClient
this
.
kubernetesCRDClient
=
KubeflowOperatorClientFactory
.
createClient
(
this
.
config
.
operator
,
this
.
config
.
apiVersion
);
...
...
@@ -82,9 +80,6 @@ export class KubeflowEnvironmentService extends KubernetesEnvironmentService {
const
expFolder
=
`
${
this
.
CONTAINER_MOUNT_PATH
}
/nni/
${
this
.
experimentId
}
`
;
environment
.
command
=
`cd
${
expFolder
}
&&
${
environment
.
command
}
\
1>
${
expFolder
}
/envs/
${
environment
.
id
}
/trialrunner_stdout 2>
${
expFolder
}
/envs/
${
environment
.
id
}
/trialrunner_stderr`
;
if
(
this
.
config
.
deprecated
&&
this
.
config
.
deprecated
.
useActiveGpu
!==
undefined
)
{
environment
.
useActiveGpu
=
this
.
config
.
deprecated
.
useActiveGpu
;
}
environment
.
maxTrialNumberPerGpu
=
this
.
config
.
maxTrialNumberPerGpu
;
const
kubeflowJobName
:
string
=
`nniexp
${
this
.
experimentId
}
env
${
environment
.
id
}
`
.
toLowerCase
();
...
...
@@ -118,22 +113,22 @@ export class KubeflowEnvironmentService extends KubernetesEnvironmentService {
private
async
prepareKubeflowConfig
(
envId
:
string
,
kubeflowJobName
:
string
):
Promise
<
any
>
{
const
workerPodResources
:
any
=
{};
if
(
this
.
config
.
worker
!==
undefined
)
{
workerPodResources
.
requests
=
this
.
generatePodResource
(
this
.
config
.
worker
.
memorySize
,
this
.
config
.
worker
.
cpuNumber
,
this
.
config
.
worker
.
gpuNumber
);
workerPodResources
.
requests
=
this
.
generatePodResource
(
toMegaBytes
(
this
.
config
.
worker
.
memorySize
)
,
this
.
config
.
worker
.
cpuNumber
,
this
.
config
.
worker
.
gpuNumber
);
}
workerPodResources
.
limits
=
{...
workerPodResources
.
requests
};
const
nonWorkerResources
:
any
=
{};
if
(
this
.
config
.
operator
===
'
tf-operator
'
)
{
if
(
this
.
config
.
ps
!==
undefined
)
{
nonWorkerResources
.
requests
=
this
.
generatePodResource
(
this
.
config
.
ps
.
memorySize
,
this
.
config
.
ps
.
cpuNumber
,
this
.
config
.
ps
.
gpuNumber
);
nonWorkerResources
.
requests
=
this
.
generatePodResource
(
toMegaBytes
(
this
.
config
.
ps
.
memorySize
)
,
this
.
config
.
ps
.
cpuNumber
,
this
.
config
.
ps
.
gpuNumber
);
nonWorkerResources
.
limits
=
{...
nonWorkerResources
.
requests
};
}
}
else
if
(
this
.
config
.
operator
===
'
pytorch-operator
'
)
{
if
(
this
.
config
.
master
!==
undefined
)
{
nonWorkerResources
.
requests
=
this
.
generatePodResource
(
this
.
config
.
master
.
memorySize
,
this
.
config
.
master
.
cpuNumber
,
this
.
config
.
master
.
gpuNumber
);
nonWorkerResources
.
requests
=
this
.
generatePodResource
(
toMegaBytes
(
this
.
config
.
master
.
memorySize
)
,
this
.
config
.
master
.
cpuNumber
,
this
.
config
.
master
.
gpuNumber
);
nonWorkerResources
.
limits
=
{...
nonWorkerResources
.
requests
};
}
}
...
...
ts/nni_manager/training_service/reusable/environments/kubernetes/kubernetesEnvironmentService.ts
View file @
d5857823
...
...
@@ -33,7 +33,7 @@ export class KubernetesEnvironmentService extends EnvironmentService {
protected
log
:
Logger
=
getLogger
(
'
KubernetesEnvironmentService
'
);
protected
environmentWorkingFolder
:
string
;
constructor
(
_config
:
ExperimentConfig
,
info
:
ExperimentStartupInfo
)
{
constructor
(
_config
:
any
,
info
:
ExperimentStartupInfo
)
{
super
();
this
.
CONTAINER_MOUNT_PATH
=
'
/tmp/mount
'
;
this
.
genericK8sClient
=
new
GeneralK8sClient
();
...
...
ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts
View file @
d5857823
...
...
@@ -6,7 +6,7 @@ import request from 'request';
import
{
Container
,
Scope
}
from
'
typescript-ioc
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
*
as
component
from
'
common/component
'
;
import
{
ExperimentConfig
,
OpenpaiConfig
,
flatten
Config
,
toMegaBytes
}
from
'
common/experimentConfig
'
;
import
{
Openpai
Config
,
toMegaBytes
}
from
'
common/experimentConfig
'
;
import
{
ExperimentStartupInfo
}
from
'
common/experimentStartupInfo
'
;
import
{
getLogger
,
Logger
}
from
'
common/log
'
;
import
{
PAIClusterConfig
}
from
'
training_service/pai/paiConfig
'
;
...
...
@@ -16,8 +16,6 @@ import { SharedStorageService } from '../sharedStorage';
import
{
MountedStorageService
}
from
'
../storages/mountedStorageService
'
;
import
{
StorageService
}
from
'
../storageService
'
;
interface
FlattenOpenpaiConfig
extends
ExperimentConfig
,
OpenpaiConfig
{
}
/**
* Collector PAI jobs info from PAI cluster, and update pai job status locally
*/
...
...
@@ -30,12 +28,12 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
private
paiToken
:
string
;
private
protocol
:
string
;
private
experimentId
:
string
;
private
config
:
Flatten
OpenpaiConfig
;
private
config
:
OpenpaiConfig
;
constructor
(
config
:
Experiment
Config
,
info
:
ExperimentStartupInfo
)
{
constructor
(
config
:
Openpai
Config
,
info
:
ExperimentStartupInfo
)
{
super
();
this
.
experimentId
=
info
.
experimentId
;
this
.
config
=
flattenConfig
(
config
,
'
openpai
'
)
;
this
.
config
=
config
;
this
.
paiToken
=
this
.
config
.
token
;
this
.
protocol
=
this
.
config
.
host
.
toLowerCase
().
startsWith
(
'
https://
'
)
?
'
https
'
:
'
http
'
;
Container
.
bind
(
StorageService
)
...
...
ts/nni_manager/training_service/reusable/environments/remoteEnvironmentService.ts
View file @
d5857823
...
...
@@ -7,7 +7,7 @@ import * as component from 'common/component';
import
{
getLogger
,
Logger
}
from
'
common/log
'
;
import
{
EnvironmentInformation
,
EnvironmentService
}
from
'
../environment
'
;
import
{
getLogLevel
}
from
'
common/utils
'
;
import
{
ExperimentConfig
,
RemoteConfig
,
RemoteMachineConfig
,
flattenConfig
}
from
'
common/experimentConfig
'
;
import
{
RemoteConfig
,
RemoteMachineConfig
}
from
'
common/experimentConfig
'
;
import
{
ExperimentStartupInfo
}
from
'
common/experimentStartupInfo
'
;
import
{
execMkdir
}
from
'
training_service/common/util
'
;
import
{
ExecutorManager
}
from
'
training_service/remote_machine/remoteMachineData
'
;
...
...
@@ -15,8 +15,6 @@ import { ShellExecutor } from 'training_service/remote_machine/shellExecutor';
import
{
RemoteMachineEnvironmentInformation
}
from
'
../remote/remoteConfig
'
;
import
{
SharedStorageService
}
from
'
../sharedStorage
'
interface
FlattenRemoteConfig
extends
ExperimentConfig
,
RemoteConfig
{
}
@
component
.
Singleton
export
class
RemoteEnvironmentService
extends
EnvironmentService
{
...
...
@@ -29,9 +27,9 @@ export class RemoteEnvironmentService extends EnvironmentService {
private
experimentRootDir
:
string
;
private
remoteExperimentRootDir
:
string
=
""
;
private
experimentId
:
string
;
private
config
:
Flatten
RemoteConfig
;
private
config
:
RemoteConfig
;
constructor
(
config
:
Experiment
Config
,
info
:
ExperimentStartupInfo
)
{
constructor
(
config
:
Remote
Config
,
info
:
ExperimentStartupInfo
)
{
super
();
this
.
experimentId
=
info
.
experimentId
;
this
.
environmentExecutorManagerMap
=
new
Map
<
string
,
ExecutorManager
>
();
...
...
@@ -39,7 +37,7 @@ export class RemoteEnvironmentService extends EnvironmentService {
this
.
remoteMachineMetaOccupiedMap
=
new
Map
<
RemoteMachineConfig
,
boolean
>
();
this
.
experimentRootDir
=
info
.
logDir
;
this
.
log
=
getLogger
(
'
RemoteEnvironmentService
'
);
this
.
config
=
flattenConfig
(
config
,
'
remote
'
)
;
this
.
config
=
config
;
// codeDir is not a valid directory, throw Error
if
(
!
fs
.
lstatSync
(
this
.
config
.
trialCodeDirectory
).
isDirectory
())
{
...
...
ts/nni_manager/training_service/reusable/routerTrainingService.ts
View file @
d5857823
...
...
@@ -26,9 +26,9 @@ class RouterTrainingService implements TrainingService {
instance
.
log
=
getLogger
(
'
RouterTrainingService
'
);
const
platform
=
Array
.
isArray
(
config
.
trainingService
)
?
'
hybrid
'
:
config
.
trainingService
.
platform
;
if
(
platform
===
'
remote
'
&&
(
<
RemoteConfig
>
config
.
trainingService
).
reuseMode
===
false
)
{
instance
.
internalTrainingService
=
new
RemoteMachineTrainingService
(
config
);
instance
.
internalTrainingService
=
new
RemoteMachineTrainingService
(
<
RemoteConfig
>
config
.
trainingService
);
}
else
if
(
platform
===
'
openpai
'
&&
(
<
OpenpaiConfig
>
config
.
trainingService
).
reuseMode
===
false
)
{
instance
.
internalTrainingService
=
new
PAITrainingService
(
config
);
instance
.
internalTrainingService
=
new
PAITrainingService
(
<
OpenpaiConfig
>
config
.
trainingService
);
}
else
if
(
platform
===
'
kubeflow
'
&&
(
<
KubeflowConfig
>
config
.
trainingService
).
reuseMode
===
false
)
{
instance
.
internalTrainingService
=
new
KubeflowTrainingService
();
}
else
if
(
platform
===
'
frameworkcontroller
'
&&
(
<
FrameworkControllerConfig
>
config
.
trainingService
).
reuseMode
===
false
)
{
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment