".github/git@developer.sourcefind.cn:OpenDAS/tilelang.git" did not exist on "4eba852ac928ba061b3696ca123aea180387d92f"
Unverified Commit b42f85f5 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Fix kubeflow & frameworkcontroller pipeline (#3991)

parent aa2cc922
...@@ -128,5 +128,3 @@ def validate_all_content(experiment_config, config_path): ...@@ -128,5 +128,3 @@ def validate_all_content(experiment_config, config_path):
if 'maxExecDuration' in experiment_config: if 'maxExecDuration' in experiment_config:
experiment_config['maxExecDuration'] = parse_time(experiment_config['maxExecDuration']) experiment_config['maxExecDuration'] = parse_time(experiment_config['maxExecDuration'])
if 'maxTrialDuration' in experiment_config:
experiment_config['maxTrialDuration'] = parse_time(experiment_config['maxTrialDuration'])
...@@ -13,6 +13,9 @@ def rest_put(url, data, timeout, show_error=False): ...@@ -13,6 +13,9 @@ def rest_put(url, data, timeout, show_error=False):
response = requests.put(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\ response = requests.put(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\
data=data, timeout=timeout) data=data, timeout=timeout)
return response return response
except requests.exceptions.Timeout:
print_error("Connect %s timeout." % url)
return None
except Exception as exception: except Exception as exception:
if show_error: if show_error:
print_error(exception) print_error(exception)
...@@ -24,6 +27,9 @@ def rest_post(url, data, timeout, show_error=False): ...@@ -24,6 +27,9 @@ def rest_post(url, data, timeout, show_error=False):
response = requests.post(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\ response = requests.post(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\
data=data, timeout=timeout) data=data, timeout=timeout)
return response return response
except requests.exceptions.Timeout:
print_error("Connect %s timeout." % url)
return None
except Exception as exception: except Exception as exception:
if show_error: if show_error:
print_error(exception) print_error(exception)
...@@ -34,6 +40,9 @@ def rest_get(url, timeout, show_error=False): ...@@ -34,6 +40,9 @@ def rest_get(url, timeout, show_error=False):
try: try:
response = requests.get(url, timeout=timeout) response = requests.get(url, timeout=timeout)
return response return response
except requests.exceptions.Timeout:
print_error("Connect %s timeout." % url)
return None
except Exception as exception: except Exception as exception:
if show_error: if show_error:
print_error(exception) print_error(exception)
...@@ -44,6 +53,9 @@ def rest_delete(url, timeout, show_error=False): ...@@ -44,6 +53,9 @@ def rest_delete(url, timeout, show_error=False):
try: try:
response = requests.delete(url, timeout=timeout) response = requests.delete(url, timeout=timeout)
return response return response
except requests.exceptions.Timeout:
print_error("Connect %s timeout." % url)
return None
except Exception as exception: except Exception as exception:
if show_error: if show_error:
print_error(exception) print_error(exception)
......
...@@ -457,6 +457,9 @@ class NNIManager implements Manager { ...@@ -457,6 +457,9 @@ class NNIManager implements Manager {
} else if (platform === 'local') { } else if (platform === 'local') {
const module_ = await import('../training_service/local/localTrainingService'); const module_ = await import('../training_service/local/localTrainingService');
return new module_.LocalTrainingService(config); return new module_.LocalTrainingService(config);
} else if (platform === 'kubeflow') {
const module_ = await import('../training_service/kubernetes/kubeflow/kubeflowTrainingService');
return new module_.KubeflowTrainingService();
} else if (platform === 'frameworkcontroller') { } else if (platform === 'frameworkcontroller') {
const module_ = await import('../training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService'); const module_ = await import('../training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService');
return new module_.FrameworkControllerTrainingService(); return new module_.FrameworkControllerTrainingService();
......
...@@ -21,7 +21,8 @@ import { MetricType } from '../common/datastore'; ...@@ -21,7 +21,8 @@ import { MetricType } from '../common/datastore';
import { ProfileUpdateType } from '../common/manager'; import { ProfileUpdateType } from '../common/manager';
import { TrialJobStatus } from '../common/trainingService'; import { TrialJobStatus } from '../common/trainingService';
const expressJoi = require('express-joi-validator'); // TODO: fix expressJoi
//const expressJoi = require('express-joi-validator');
class NNIRestHandler { class NNIRestHandler {
private restServer: NNIRestServer; private restServer: NNIRestServer;
...@@ -205,7 +206,7 @@ class NNIRestHandler { ...@@ -205,7 +206,7 @@ class NNIRestHandler {
private setClusterMetaData(router: Router): void { private setClusterMetaData(router: Router): void {
router.put( router.put(
'/experiment/cluster-metadata', expressJoi(ValidationSchemas.SETCLUSTERMETADATA), '/experiment/cluster-metadata', //TODO: Fix validation expressJoi(ValidationSchemas.SETCLUSTERMETADATA),
async (req: Request, res: Response) => { async (req: Request, res: Response) => {
const metadata: any = req.body; const metadata: any = req.body;
const keys: string[] = Object.keys(metadata); const keys: string[] = Object.keys(metadata);
......
...@@ -226,6 +226,7 @@ export namespace ValidationSchemas { ...@@ -226,6 +226,7 @@ export namespace ValidationSchemas {
trainingServicePlatform: joi.string(), trainingServicePlatform: joi.string(),
searchSpace: joi.string().required(), searchSpace: joi.string().required(),
maxExecDuration: joi.number().min(0).required(), maxExecDuration: joi.number().min(0).required(),
maxTrialDuration: joi.number().min(0).required(),
multiPhase: joi.boolean(), multiPhase: joi.boolean(),
multiThread: joi.boolean(), multiThread: joi.boolean(),
nniManagerIp: joi.string(), nniManagerIp: joi.string(),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment