Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
91a68821
Unverified
Commit
91a68821
authored
Jun 05, 2022
by
Yuge Zhang
Committed by
GitHub
Jun 05, 2022
Browse files
Refactor integration test (step 2) - AML authentication and debuggability (#4893)
parent
73687a66
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
36 additions
and
9 deletions
+36
-9
ts/nni_manager/config/aml/amlUtil.py
ts/nni_manager/config/aml/amlUtil.py
+17
-3
ts/nni_manager/core/nnimanager.ts
ts/nni_manager/core/nnimanager.ts
+9
-3
ts/nni_manager/training_service/reusable/aml/amlClient.ts
ts/nni_manager/training_service/reusable/aml/amlClient.ts
+9
-3
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
...ng_service/reusable/environments/amlEnvironmentService.ts
+1
-0
No files found.
ts/nni_manager/config/aml/amlUtil.py
View file @
91a68821
...
@@ -5,11 +5,14 @@ import os
...
@@ -5,11 +5,14 @@ import os
import
sys
import
sys
import
time
import
time
import
json
import
json
import
warnings
from
argparse
import
ArgumentParser
from
argparse
import
ArgumentParser
from
azureml.core
import
Experiment
,
RunConfiguration
,
ScriptRunConfig
from
azureml.core
import
Experiment
,
RunConfiguration
,
ScriptRunConfig
,
Workspace
from
azureml.core.authentication
import
(
AzureCliAuthentication
,
InteractiveLoginAuthentication
,
AuthenticationException
)
from
azureml.core.compute
import
ComputeTarget
from
azureml.core.compute
import
ComputeTarget
from
azureml.core.run
import
RUNNING_STATES
,
RunStatus
,
Run
from
azureml.core.run
import
RUNNING_STATES
,
RunStatus
,
Run
from
azureml.core
import
Workspace
from
azureml.core.conda_dependencies
import
CondaDependencies
from
azureml.core.conda_dependencies
import
CondaDependencies
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -24,7 +27,18 @@ if __name__ == "__main__":
...
@@ -24,7 +27,18 @@ if __name__ == "__main__":
parser
.
add_argument
(
'--script_name'
,
help
=
'script name'
)
parser
.
add_argument
(
'--script_name'
,
help
=
'script name'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
ws
=
Workspace
(
args
.
subscription_id
,
args
.
resource_group
,
args
.
workspace_name
)
try
:
auth
=
AzureCliAuthentication
()
auth
.
get_token
()
except
AuthenticationException
as
e
:
warnings
.
warn
(
f
'Azure-cli authentication failed:
{
e
}
'
,
RuntimeWarning
)
warnings
.
warn
(
'Falling back to interactive authentication.'
,
RuntimeWarning
)
auth
=
InteractiveLoginAuthentication
()
ws
=
Workspace
(
args
.
subscription_id
,
args
.
resource_group
,
args
.
workspace_name
,
auth
=
auth
)
compute_target
=
ComputeTarget
(
workspace
=
ws
,
name
=
args
.
compute_target
)
compute_target
=
ComputeTarget
(
workspace
=
ws
,
name
=
args
.
compute_target
)
experiment
=
Experiment
(
ws
,
args
.
experiment_name
)
experiment
=
Experiment
(
ws
,
args
.
experiment_name
)
run_config
=
RunConfiguration
()
run_config
=
RunConfiguration
()
...
...
ts/nni_manager/core/nnimanager.ts
View file @
91a68821
...
@@ -728,6 +728,7 @@ class NNIManager implements Manager {
...
@@ -728,6 +728,7 @@ class NNIManager implements Manager {
throw
NNIError
.
FromError
(
err
,
'
Dispatcher error:
'
);
throw
NNIError
.
FromError
(
err
,
'
Dispatcher error:
'
);
}),
}),
this
.
trainingService
.
run
().
catch
((
err
:
Error
)
=>
{
this
.
trainingService
.
run
().
catch
((
err
:
Error
)
=>
{
// FIXME: The error handling here could crash when err is undefined.
throw
NNIError
.
FromError
(
err
,
'
Training service error:
'
);
throw
NNIError
.
FromError
(
err
,
'
Training service error:
'
);
}),
}),
this
.
manageTrials
().
catch
((
err
:
Error
)
=>
{
this
.
manageTrials
().
catch
((
err
:
Error
)
=>
{
...
@@ -870,10 +871,15 @@ class NNIManager implements Manager {
...
@@ -870,10 +871,15 @@ class NNIManager implements Manager {
}
}
private
logError
(
err
:
Error
):
void
{
private
logError
(
err
:
Error
):
void
{
if
(
err
.
stack
!==
undefined
)
{
if
(
err
!==
undefined
)
{
this
.
log
.
error
(
err
.
stack
);
// FIXME: I don't know why, but in some cases err could be undefined.
if
(
err
.
stack
!==
undefined
)
{
this
.
log
.
error
(
err
.
stack
);
this
.
status
.
errors
.
push
(
err
.
message
);
}
else
{
this
.
status
.
errors
.
push
(
`Undefined error, stack:
${
new
Error
().
stack
}
`
);
}
}
}
this
.
status
.
errors
.
push
(
err
.
message
);
this
.
setEndtime
();
this
.
setEndtime
();
this
.
setStatus
(
'
ERROR
'
);
this
.
setStatus
(
'
ERROR
'
);
}
}
...
...
ts/nni_manager/training_service/reusable/aml/amlClient.ts
View file @
91a68821
...
@@ -135,12 +135,18 @@ export class AMLClient {
...
@@ -135,12 +135,18 @@ export class AMLClient {
// Monitor error information in aml python shell client
// Monitor error information in aml python shell client
private
monitorError
(
pythonShellClient
:
PythonShell
,
deferred
:
Deferred
<
any
>
):
void
{
private
monitorError
(
pythonShellClient
:
PythonShell
,
deferred
:
Deferred
<
any
>
):
void
{
pythonShellClient
.
on
(
'
error
'
,
function
(
error
:
any
)
{
pythonShellClient
.
on
(
'
stderr
'
,
function
(
chunk
:
any
)
{
deferred
.
reject
(
error
);
// FIXME: The error will only appear in console.
// Still need to find a way to put them into logs.
console
.
error
(
`Python process stderr:
${
chunk
}
`
);
});
});
pythonShellClient
.
on
(
'
close
'
,
function
(
error
:
any
)
{
pythonShellClient
.
on
(
'
error
'
,
function
(
error
:
Error
)
{
console
.
error
(
`Python process fires error:
${
error
}
`
);
deferred
.
reject
(
error
);
deferred
.
reject
(
error
);
});
});
pythonShellClient
.
on
(
'
close
'
,
function
()
{
deferred
.
reject
(
new
Error
(
'
AML client Python process unknown error.
'
));
});
}
}
// Parse command content, command format is {head}:{content}
// Parse command content, command format is {head}:{content}
...
...
ts/nni_manager/training_service/reusable/environments/amlEnvironmentService.ts
View file @
91a68821
...
@@ -109,6 +109,7 @@ export class AMLEnvironmentService extends EnvironmentService {
...
@@ -109,6 +109,7 @@ export class AMLEnvironmentService extends EnvironmentService {
'
nni_script.py
'
,
'
nni_script.py
'
,
environmentLocalTempFolder
environmentLocalTempFolder
);
);
this
.
log
.
debug
(
'
aml: before amlClient submit
'
);
amlEnvironment
.
id
=
await
amlClient
.
submit
();
amlEnvironment
.
id
=
await
amlClient
.
submit
();
this
.
log
.
debug
(
'
aml: before getTrackingUrl
'
);
this
.
log
.
debug
(
'
aml: before getTrackingUrl
'
);
amlEnvironment
.
trackingUrl
=
await
amlClient
.
getTrackingUrl
();
amlEnvironment
.
trackingUrl
=
await
amlClient
.
getTrackingUrl
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment