Unverified Commit 91a68821 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Refactor integration test (step 2) - AML authentication and debuggability (#4893)

parent 73687a66
...@@ -5,11 +5,14 @@ import os ...@@ -5,11 +5,14 @@ import os
import sys import sys
import time import time
import json import json
import warnings
from argparse import ArgumentParser from argparse import ArgumentParser
from azureml.core import Experiment, RunConfiguration, ScriptRunConfig from azureml.core import Experiment, RunConfiguration, ScriptRunConfig, Workspace
from azureml.core.authentication import (
AzureCliAuthentication, InteractiveLoginAuthentication, AuthenticationException
)
from azureml.core.compute import ComputeTarget from azureml.core.compute import ComputeTarget
from azureml.core.run import RUNNING_STATES, RunStatus, Run from azureml.core.run import RUNNING_STATES, RunStatus, Run
from azureml.core import Workspace
from azureml.core.conda_dependencies import CondaDependencies from azureml.core.conda_dependencies import CondaDependencies
if __name__ == "__main__": if __name__ == "__main__":
...@@ -24,7 +27,18 @@ if __name__ == "__main__": ...@@ -24,7 +27,18 @@ if __name__ == "__main__":
parser.add_argument('--script_name', help='script name') parser.add_argument('--script_name', help='script name')
args = parser.parse_args() args = parser.parse_args()
ws = Workspace(args.subscription_id, args.resource_group, args.workspace_name) try:
auth = AzureCliAuthentication()
auth.get_token()
except AuthenticationException as e:
warnings.warn(
f'Azure-cli authentication failed: {e}',
RuntimeWarning
)
warnings.warn('Falling back to interactive authentication.', RuntimeWarning)
auth = InteractiveLoginAuthentication()
ws = Workspace(args.subscription_id, args.resource_group, args.workspace_name, auth=auth)
compute_target = ComputeTarget(workspace=ws, name=args.compute_target) compute_target = ComputeTarget(workspace=ws, name=args.compute_target)
experiment = Experiment(ws, args.experiment_name) experiment = Experiment(ws, args.experiment_name)
run_config = RunConfiguration() run_config = RunConfiguration()
......
...@@ -728,6 +728,7 @@ class NNIManager implements Manager { ...@@ -728,6 +728,7 @@ class NNIManager implements Manager {
throw NNIError.FromError(err, 'Dispatcher error: '); throw NNIError.FromError(err, 'Dispatcher error: ');
}), }),
this.trainingService.run().catch((err: Error) => { this.trainingService.run().catch((err: Error) => {
// FIXME: The error handling here could crash when err is undefined.
throw NNIError.FromError(err, 'Training service error: '); throw NNIError.FromError(err, 'Training service error: ');
}), }),
this.manageTrials().catch((err: Error) => { this.manageTrials().catch((err: Error) => {
...@@ -870,10 +871,15 @@ class NNIManager implements Manager { ...@@ -870,10 +871,15 @@ class NNIManager implements Manager {
} }
private logError(err: Error): void { private logError(err: Error): void {
if (err.stack !== undefined) { if (err !== undefined) {
this.log.error(err.stack); // FIXME: I don't know why, but in some cases err could be undefined.
if (err.stack !== undefined) {
this.log.error(err.stack);
this.status.errors.push(err.message);
} else {
this.status.errors.push(`Undefined error, stack: ${new Error().stack}`);
}
} }
this.status.errors.push(err.message);
this.setEndtime(); this.setEndtime();
this.setStatus('ERROR'); this.setStatus('ERROR');
} }
......
...@@ -135,12 +135,18 @@ export class AMLClient { ...@@ -135,12 +135,18 @@ export class AMLClient {
// Monitor error information in aml python shell client // Monitor error information in aml python shell client
private monitorError(pythonShellClient: PythonShell, deferred: Deferred<any>): void { private monitorError(pythonShellClient: PythonShell, deferred: Deferred<any>): void {
pythonShellClient.on('error', function (error: any) { pythonShellClient.on('stderr', function (chunk: any) {
deferred.reject(error); // FIXME: The error will only appear in console.
// Still need to find a way to put them into logs.
console.error(`Python process stderr: ${chunk}`);
}); });
pythonShellClient.on('close', function (error: any) { pythonShellClient.on('error', function (error: Error) {
console.error(`Python process fires error: ${error}`);
deferred.reject(error); deferred.reject(error);
}); });
pythonShellClient.on('close', function () {
deferred.reject(new Error('AML client Python process unknown error.'));
});
} }
// Parse command content, command format is {head}:{content} // Parse command content, command format is {head}:{content}
......
...@@ -109,6 +109,7 @@ export class AMLEnvironmentService extends EnvironmentService { ...@@ -109,6 +109,7 @@ export class AMLEnvironmentService extends EnvironmentService {
'nni_script.py', 'nni_script.py',
environmentLocalTempFolder environmentLocalTempFolder
); );
this.log.debug('aml: before amlClient submit');
amlEnvironment.id = await amlClient.submit(); amlEnvironment.id = await amlClient.submit();
this.log.debug('aml: before getTrackingUrl'); this.log.debug('aml: before getTrackingUrl');
amlEnvironment.trackingUrl = await amlClient.getTrackingUrl(); amlEnvironment.trackingUrl = await amlClient.getTrackingUrl();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment