"git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "af9d4eb038c9d8d6f86b043292134bba7ad66805"
Unverified Commit 12410686 authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Merge pull request #20 from microsoft/master

pull code
parents 611a45fc 61fec446
......@@ -63,9 +63,9 @@ def get_model(PARAMS):
if not model_dict.get(PARAMS['model_name']):
LOG.exception('Not supported model!')
exit(1)
model = model_dict[PARAMS['model_name']]
try:
if PARAMS['model_name'] == 'SVR':
model.kernel = PARAMS['svr_kernel']
......
......@@ -10,7 +10,7 @@ useAnnotation: false
multiThread: true
tuner:
codeDir: ../../../tuners/weight_sharing/ga_customer_tuner
classFileName: customer_tuner.py
classFileName: customer_tuner.py
className: CustomerTuner
classArgs:
optimize_mode: maximize
......@@ -23,9 +23,9 @@ trial:
machineList:
- ip: remote-ip-0
port: 8022
username: root
username: root
passwd: screencast
- ip: remote-ip-1
port: 8022
username: root
username: root
passwd: screencast
......@@ -241,9 +241,7 @@ def get_id(word_dict, word):
'''
Given word, return word id.
'''
if word in word_dict.keys():
return word_dict[word]
return word_dict['<unk>']
return word_dict.get(word, word_dict['<unk>'])
def get_buckets(min_length, max_length, bucket_count):
......
......@@ -290,7 +290,7 @@ def graph_to_network(input1,
if topo_i == '|':
continue
# Note: here we use the `hash_id` of layer as scope name,
# Note: here we use the `hash_id` of layer as scope name,
# so that we can automatically load sharable weights from previous trained models
with tf.variable_scope(p_graph.layers[topo_i].hash_id, reuse=tf.AUTO_REUSE):
if p_graph.layers[topo_i].graph_type == LayerType.input.value:
......
# How to use ga_customer_tuner?
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
type `cd ~/nni/examples/trials/ga_squad` and check readme.md to get more information for ga_squad trial.
# config
# config
If you want to use ga_customer_tuner in your experiment, you could set config file as following format:
```
......
import numpy as np
from nni.tuner import Tuner
def random_archi_generator(nas_ss, random_state):
'''random
'''
chosen_archi = {}
print("zql: nas search space: ", nas_ss)
for block_name, block in nas_ss.items():
tmp_block = {}
for layer_name, layer in block.items():
tmp_layer = {}
for key, value in layer.items():
if key == 'layer_choice':
index = random_state.randint(len(value))
tmp_layer['chosen_layer'] = value[index]
elif key == 'optional_inputs':
tmp_layer['chosen_inputs'] = []
print("zql: optional_inputs", layer['optional_inputs'])
if layer['optional_inputs']:
if isinstance(layer['optional_input_size'], int):
choice_num = layer['optional_input_size']
else:
choice_range = layer['optional_input_size']
choice_num = random_state.randint(choice_range[0], choice_range[1]+1)
for _ in range(choice_num):
index = random_state.randint(len(layer['optional_inputs']))
tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index])
elif key == 'optional_input_size':
pass
else:
raise ValueError('Unknown field %s in layer %s of block %s' % (key, layer_name, block_name))
tmp_block[layer_name] = tmp_layer
chosen_archi[block_name] = tmp_block
return chosen_archi
class RandomNASTuner(Tuner):
'''RandomNASTuner
'''
def __init__(self):
self.searchspace_json = None
self.random_state = None
def update_search_space(self, search_space):
'''update
'''
self.searchspace_json = search_space
self.random_state = np.random.RandomState()
def generate_parameters(self, parameter_id):
'''generate
'''
return random_archi_generator(self.searchspace_json, self.random_state)
def receive_trial_result(self, parameter_id, parameters, value):
'''receive
'''
pass
# How to use ga_customer_tuner?
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
type `cd ~/nni/examples/trials/ga_squad` and check readme.md to get more information for ga_squad trial.
# config
# config
If you want to use ga_customer_tuner in your experiment, you could set config file as following format:
```
......
......@@ -56,7 +56,8 @@ setup(
'scipy',
'schema',
'PythonWebHDFS',
'colorama'
'colorama',
'sklearn'
],
entry_points = {
......
......@@ -29,7 +29,7 @@ import { getBasePort } from './experimentStartupInfo';
/**
* Abstraction class to create a RestServer
* The module who wants to use a RestServer could <b>extends</b> this abstract class
* The module who wants to use a RestServer could <b>extends</b> this abstract class
* And implement its own registerRestHandler() function to register routers
*/
export abstract class RestServer {
......@@ -43,7 +43,7 @@ export abstract class RestServer {
protected app: express.Application = express();
protected log: Logger = getLogger();
protected basePort?: number;
constructor() {
this.port = getBasePort();
assert(this.port && this.port > 1024);
......@@ -91,9 +91,9 @@ export abstract class RestServer {
} else {
this.startTask.promise.then(
() => { // Started
//Stops the server from accepting new connections and keeps existing connections.
//This function is asynchronous, the server is finally closed when all connections
//are ended and the server emits a 'close' event.
//Stops the server from accepting new connections and keeps existing connections.
//This function is asynchronous, the server is finally closed when all connections
//are ended and the server emits a 'close' event.
//Refer https://nodejs.org/docs/latest/api/net.html#net_server_close_callback
this.server.close().on('close', () => {
this.log.info('Rest server stopped.');
......
......@@ -91,6 +91,7 @@ interface TrialJobMetric {
* define TrainingServiceError
*/
class TrainingServiceError extends Error {
private errCode: number;
constructor(errorCode: number, errorMessage: string) {
......@@ -136,5 +137,3 @@ export {
TrainingServiceMetadata, TrialJobDetail, TrialJobMetric, HyperParameters,
HostJobApplicationForm, JobApplicationForm, JobType, NNIManagerIpConfig
};
......@@ -167,7 +167,7 @@ function getCmdPy(): string {
}
/**
* Generate command line to start automl algorithm(s),
* Generate command line to start automl algorithm(s),
* either start advisor or start a process which runs tuner and assessor
* @param tuner : For builtin tuner:
* {
......@@ -361,11 +361,11 @@ function countFilesRecursively(directory: string, timeoutMilliSeconds?: number):
if(process.platform === "win32") {
cmd = `powershell "Get-ChildItem -Path ${directory} -Recurse -File | Measure-Object | %{$_.Count}"`
} else {
cmd = `find ${directory} -type f | wc -l`;
cmd = `find ${directory} -type f | wc -l`;
}
cpp.exec(cmd).then((result) => {
if(result.stdout && parseInt(result.stdout)) {
fileCount = parseInt(result.stdout);
fileCount = parseInt(result.stdout);
}
deferred.resolve(fileCount);
});
......@@ -374,6 +374,40 @@ function countFilesRecursively(directory: string, timeoutMilliSeconds?: number):
});
}
function validateFileName(fileName: string): boolean {
let pattern: string = '^[a-z0-9A-Z\.-_]+$';
const validateResult = fileName.match(pattern);
if(validateResult) {
return true;
}
return false;
}
async function validateFileNameRecursively(directory: string): Promise<boolean> {
if(!fs.existsSync(directory)) {
throw Error(`Direcotory ${directory} doesn't exist`);
}
const fileNameArray: string[] = fs.readdirSync(directory);
let result = true;
for(var name of fileNameArray){
const fullFilePath: string = path.join(directory, name);
try {
// validate file names and directory names
result = validateFileName(name);
if (fs.lstatSync(fullFilePath).isDirectory()) {
result = result && await validateFileNameRecursively(fullFilePath);
}
if(!result) {
return Promise.reject(new Error(`file name in ${fullFilePath} is not valid!`));
}
} catch(error) {
return Promise.reject(error);
}
}
return Promise.resolve(result);
}
/**
* get the version of current package
*/
......@@ -385,7 +419,7 @@ async function getVersion(): Promise<string> {
deferred.reject(error);
});
return deferred.promise;
}
}
/**
* run command as ChildProcess
......@@ -437,7 +471,7 @@ async function isAlive(pid:any): Promise<boolean> {
}
/**
* kill process
* kill process
*/
async function killPid(pid:any): Promise<void> {
let deferred : Deferred<void> = new Deferred<void>();
......@@ -466,7 +500,7 @@ function getNewLine(): string {
/**
* Use '/' to join path instead of '\' for all kinds of platform
* @param path
* @param path
*/
function unixPathJoin(...paths: any[]): string {
const dir: string = paths.filter((path: any) => path !== '').join('/');
......@@ -474,6 +508,6 @@ function unixPathJoin(...paths: any[]): string {
return dir;
}
export {countFilesRecursively, getRemoteTmpDir, generateParamFileName, getMsgDispatcherCommand, getCheckpointDir,
export {countFilesRecursively, validateFileNameRecursively, getRemoteTmpDir, generateParamFileName, getMsgDispatcherCommand, getCheckpointDir,
getLogDir, getExperimentRootDir, getJobCancelStatus, getDefaultDatabaseDir, getIPV4Address, unixPathJoin,
mkDirP, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect, getLogLevel, getVersion, getCmdPy, getTunerProc, isAlive, killPid, getNewLine };
{
"kind": "CustomResourceDefinition",
"kind": "CustomResourceDefinition",
"spec": {
"scope": "Namespaced",
"version": "v1",
"group": "frameworkcontroller.microsoft.com",
"scope": "Namespaced",
"version": "v1",
"group": "frameworkcontroller.microsoft.com",
"names": {
"kind": "Framework",
"plural": "frameworks",
"kind": "Framework",
"plural": "frameworks",
"singular": "framework"
}
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
"metadata": {
"name": "frameworks.frameworkcontroller.microsoft.com"
}
......
{
"kind": "CustomResourceDefinition",
"kind": "CustomResourceDefinition",
"spec": {
"scope": "Namespaced",
"version": "v1alpha2",
"group": "kubeflow.org",
"scope": "Namespaced",
"version": "v1alpha2",
"group": "kubeflow.org",
"names": {
"kind": "PyTorchJob",
"plural": "pytorchjobs",
"kind": "PyTorchJob",
"plural": "pytorchjobs",
"singular": "pytorchjob"
}
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
"metadata": {
"name": "pytorchjobs.kubeflow.org"
}
......
{
"kind": "CustomResourceDefinition",
"kind": "CustomResourceDefinition",
"spec": {
"scope": "Namespaced",
"version": "v1beta1",
"group": "kubeflow.org",
"scope": "Namespaced",
"version": "v1beta1",
"group": "kubeflow.org",
"names": {
"kind": "PyTorchJob",
"plural": "pytorchjobs",
"kind": "PyTorchJob",
"plural": "pytorchjobs",
"singular": "pytorchjob"
}
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
"metadata": {
"name": "pytorchjobs.kubeflow.org"
}
......
{
"kind": "CustomResourceDefinition",
"kind": "CustomResourceDefinition",
"spec": {
"scope": "Namespaced",
"version": "v1alpha2",
"group": "kubeflow.org",
"scope": "Namespaced",
"version": "v1alpha2",
"group": "kubeflow.org",
"names": {
"kind": "TFJob",
"plural": "tfjobs",
"kind": "TFJob",
"plural": "tfjobs",
"singular": "tfjob"
}
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
"metadata": {
"name": "tfjobs.kubeflow.org"
}
......
{
"kind": "CustomResourceDefinition",
"kind": "CustomResourceDefinition",
"spec": {
"scope": "Namespaced",
"version": "v1beta1",
"group": "kubeflow.org",
"scope": "Namespaced",
"version": "v1beta1",
"group": "kubeflow.org",
"names": {
"kind": "TFJob",
"plural": "tfjobs",
"kind": "TFJob",
"plural": "tfjobs",
"singular": "tfjob"
}
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
},
"apiVersion": "apiextensions.k8s.io/v1beta1",
"metadata": {
"name": "tfjobs.kubeflow.org"
}
......
......@@ -159,7 +159,7 @@ class NNIManager implements Manager {
if (expParams.logCollection !== undefined) {
this.trainingService.setClusterMetadata('log_collection', expParams.logCollection.toString());
}
const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor, expParams.advisor,
expParams.multiPhase, expParams.multiThread);
this.log.debug(`dispatcher command: ${dispatcherCommand}`);
......@@ -493,7 +493,7 @@ class NNIManager implements Manager {
// If trialConcurrency does not change, requestTrialNum equals finishedTrialJobNum.
// If trialConcurrency changes, for example, trialConcurrency increases by 2 (trialConcurrencyChange=2), then
// requestTrialNum equals 2 + finishedTrialJobNum and trialConcurrencyChange becomes 0.
// If trialConcurrency changes, for example, trialConcurrency decreases by 4 (trialConcurrencyChange=-4) and
// If trialConcurrency changes, for example, trialConcurrency decreases by 4 (trialConcurrencyChange=-4) and
// finishedTrialJobNum is 2, then requestTrialNum becomes -2. No trial will be requested from tuner,
// and trialConcurrencyChange becomes -2.
const requestTrialNum: number = this.trialConcurrencyChange + finishedTrialJobNum;
......
......@@ -46,11 +46,11 @@ function runProcess(): Promise<Error | null> {
if (code !== 0) {
deferred.resolve(new Error(`return code: ${code}`));
} else {
let str = proc.stdout.read().toString();
let str = proc.stdout.read().toString();
if(str.search("\r\n")!=-1){
sentCommands = str.split("\r\n");
}
else{
else{
sentCommands = str.split('\n');
}
deferred.resolve(null);
......@@ -76,7 +76,7 @@ function runProcess(): Promise<Error | null> {
commandTooLong = error;
}
// Command #4: FE is not tuner/assessor command, test the exception type of send non-valid command
// Command #4: FE is not tuner/assessor command, test the exception type of send non-valid command
try {
dispatcher.sendCommand('FE', '1');
} catch (error) {
......
......@@ -59,10 +59,10 @@ class MockedTrainingService extends TrainingService {
},
sequenceId: 0
};
public listTrialJobs(): Promise<TrialJobDetail[]> {
const deferred = new Deferred<TrialJobDetail[]>();
deferred.resolve([this.jobDetail1, this.jobDetail2]);
return deferred.promise;
}
......
......@@ -104,7 +104,7 @@ describe('Unit test for nnimanager', function () {
maxSequenceId: 0,
revision: 0
}
before(async () => {
await initContainer();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment