Unverified Commit 91a68821 authored by Yuge Zhang's avatar Yuge Zhang Committed by GitHub
Browse files

Refactor integration test (step 2) - AML authentication and debuggability (#4893)

parent 73687a66
......@@ -5,11 +5,14 @@ import os
import sys
import time
import json
import warnings
from argparse import ArgumentParser
from azureml.core import Experiment, RunConfiguration, ScriptRunConfig
from azureml.core import Experiment, RunConfiguration, ScriptRunConfig, Workspace
from azureml.core.authentication import (
AzureCliAuthentication, InteractiveLoginAuthentication, AuthenticationException
)
from azureml.core.compute import ComputeTarget
from azureml.core.run import RUNNING_STATES, RunStatus, Run
from azureml.core import Workspace
from azureml.core.conda_dependencies import CondaDependencies
if __name__ == "__main__":
......@@ -24,7 +27,18 @@ if __name__ == "__main__":
parser.add_argument('--script_name', help='script name')
args = parser.parse_args()
ws = Workspace(args.subscription_id, args.resource_group, args.workspace_name)
try:
auth = AzureCliAuthentication()
auth.get_token()
except AuthenticationException as e:
warnings.warn(
f'Azure-cli authentication failed: {e}',
RuntimeWarning
)
warnings.warn('Falling back to interactive authentication.', RuntimeWarning)
auth = InteractiveLoginAuthentication()
ws = Workspace(args.subscription_id, args.resource_group, args.workspace_name, auth=auth)
compute_target = ComputeTarget(workspace=ws, name=args.compute_target)
experiment = Experiment(ws, args.experiment_name)
run_config = RunConfiguration()
......
......@@ -728,6 +728,7 @@ class NNIManager implements Manager {
throw NNIError.FromError(err, 'Dispatcher error: ');
}),
this.trainingService.run().catch((err: Error) => {
// FIXME: The error handling here could crash when err is undefined.
throw NNIError.FromError(err, 'Training service error: ');
}),
this.manageTrials().catch((err: Error) => {
......@@ -870,10 +871,15 @@ class NNIManager implements Manager {
}
private logError(err: Error): void {
if (err !== undefined) {
// FIXME: I don't know why, but in some cases err could be undefined.
if (err.stack !== undefined) {
this.log.error(err.stack);
}
this.status.errors.push(err.message);
} else {
this.status.errors.push(`Undefined error, stack: ${new Error().stack}`);
}
}
this.setEndtime();
this.setStatus('ERROR');
}
......
......@@ -135,12 +135,18 @@ export class AMLClient {
// Monitor error information in aml python shell client
private monitorError(pythonShellClient: PythonShell, deferred: Deferred<any>): void {
pythonShellClient.on('error', function (error: any) {
deferred.reject(error);
pythonShellClient.on('stderr', function (chunk: any) {
// FIXME: The error will only appear in console.
// Still need to find a way to put them into logs.
console.error(`Python process stderr: ${chunk}`);
});
pythonShellClient.on('close', function (error: any) {
pythonShellClient.on('error', function (error: Error) {
console.error(`Python process fires error: ${error}`);
deferred.reject(error);
});
pythonShellClient.on('close', function () {
deferred.reject(new Error('AML client Python process unknown error.'));
});
}
// Parse command content, command format is {head}:{content}
......
......@@ -109,6 +109,7 @@ export class AMLEnvironmentService extends EnvironmentService {
'nni_script.py',
environmentLocalTempFolder
);
this.log.debug('aml: before amlClient submit');
amlEnvironment.id = await amlClient.submit();
this.log.debug('aml: before getTrackingUrl');
amlEnvironment.trackingUrl = await amlClient.getTrackingUrl();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment