Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
12410686
"git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "af9d4eb038c9d8d6f86b043292134bba7ad66805"
Unverified
Commit
12410686
authored
Jun 21, 2019
by
chicm-ms
Committed by
GitHub
Jun 21, 2019
Browse files
Merge pull request #20 from microsoft/master
pull code
parents
611a45fc
61fec446
Changes
242
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
168 additions
and
76 deletions
+168
-76
examples/trials/sklearn/regression/main.py
examples/trials/sklearn/regression/main.py
+2
-2
examples/trials/weight_sharing/ga_squad/config_remote.yml
examples/trials/weight_sharing/ga_squad/config_remote.yml
+3
-3
examples/trials/weight_sharing/ga_squad/data.py
examples/trials/weight_sharing/ga_squad/data.py
+1
-3
examples/trials/weight_sharing/ga_squad/graph_to_tf.py
examples/trials/weight_sharing/ga_squad/graph_to_tf.py
+1
-1
examples/tuners/ga_customer_tuner/README.md
examples/tuners/ga_customer_tuner/README.md
+2
-2
examples/tuners/random_nas_tuner/random_nas_tuner.py
examples/tuners/random_nas_tuner/random_nas_tuner.py
+60
-0
examples/tuners/weight_sharing/ga_customer_tuner/README.md
examples/tuners/weight_sharing/ga_customer_tuner/README.md
+2
-2
setup.py
setup.py
+2
-1
src/nni_manager/common/restServer.ts
src/nni_manager/common/restServer.ts
+5
-5
src/nni_manager/common/trainingService.ts
src/nni_manager/common/trainingService.ts
+1
-2
src/nni_manager/common/utils.ts
src/nni_manager/common/utils.ts
+41
-7
src/nni_manager/config/frameworkcontroller/frameworkcontrollerjob-crd-v1.json
...ig/frameworkcontroller/frameworkcontrollerjob-crd-v1.json
+8
-8
src/nni_manager/config/kubeflow/pytorchjob-crd-v1alpha2.json
src/nni_manager/config/kubeflow/pytorchjob-crd-v1alpha2.json
+8
-8
src/nni_manager/config/kubeflow/pytorchjob-crd-v1beta1.json
src/nni_manager/config/kubeflow/pytorchjob-crd-v1beta1.json
+8
-8
src/nni_manager/config/kubeflow/tfjob-crd-v1alpha2.json
src/nni_manager/config/kubeflow/tfjob-crd-v1alpha2.json
+8
-8
src/nni_manager/config/kubeflow/tfjob-crd-v1beta1.json
src/nni_manager/config/kubeflow/tfjob-crd-v1beta1.json
+8
-8
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+2
-2
src/nni_manager/core/test/ipcInterface.test.ts
src/nni_manager/core/test/ipcInterface.test.ts
+3
-3
src/nni_manager/core/test/mockedTrainingService.ts
src/nni_manager/core/test/mockedTrainingService.ts
+2
-2
src/nni_manager/core/test/nnimanager.test.ts
src/nni_manager/core/test/nnimanager.test.ts
+1
-1
No files found.
examples/trials/sklearn/regression/main.py
View file @
12410686
...
...
@@ -63,9 +63,9 @@ def get_model(PARAMS):
if
not
model_dict
.
get
(
PARAMS
[
'model_name'
]):
LOG
.
exception
(
'Not supported model!'
)
exit
(
1
)
model
=
model_dict
[
PARAMS
[
'model_name'
]]
try
:
if
PARAMS
[
'model_name'
]
==
'SVR'
:
model
.
kernel
=
PARAMS
[
'svr_kernel'
]
...
...
examples/trials/weight_sharing/ga_squad/config_remote.yml
View file @
12410686
...
...
@@ -10,7 +10,7 @@ useAnnotation: false
multiThread
:
true
tuner
:
codeDir
:
../../../tuners/weight_sharing/ga_customer_tuner
classFileName
:
customer_tuner.py
classFileName
:
customer_tuner.py
className
:
CustomerTuner
classArgs
:
optimize_mode
:
maximize
...
...
@@ -23,9 +23,9 @@ trial:
machineList
:
-
ip
:
remote-ip-0
port
:
8022
username
:
root
username
:
root
passwd
:
screencast
-
ip
:
remote-ip-1
port
:
8022
username
:
root
username
:
root
passwd
:
screencast
examples/trials/weight_sharing/ga_squad/data.py
View file @
12410686
...
...
@@ -241,9 +241,7 @@ def get_id(word_dict, word):
'''
Given word, return word id.
'''
if
word
in
word_dict
.
keys
():
return
word_dict
[
word
]
return
word_dict
[
'<unk>'
]
return
word_dict
.
get
(
word
,
word_dict
[
'<unk>'
])
def
get_buckets
(
min_length
,
max_length
,
bucket_count
):
...
...
examples/trials/weight_sharing/ga_squad/graph_to_tf.py
View file @
12410686
...
...
@@ -290,7 +290,7 @@ def graph_to_network(input1,
if
topo_i
==
'|'
:
continue
# Note: here we use the `hash_id` of layer as scope name,
# Note: here we use the `hash_id` of layer as scope name,
# so that we can automatically load sharable weights from previous trained models
with
tf
.
variable_scope
(
p_graph
.
layers
[
topo_i
].
hash_id
,
reuse
=
tf
.
AUTO_REUSE
):
if
p_graph
.
layers
[
topo_i
].
graph_type
==
LayerType
.
input
.
value
:
...
...
examples/tuners/ga_customer_tuner/README.md
View file @
12410686
# How to use ga_customer_tuner?
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
type
`cd ~/nni/examples/trials/ga_squad`
and check readme.md to get more information for ga_squad trial.
# config
# config
If you want to use ga_customer_tuner in your experiment, you could set config file as following format:
```
...
...
examples/tuners/random_nas_tuner/random_nas_tuner.py
0 → 100644
View file @
12410686
import
numpy
as
np
from
nni.tuner
import
Tuner
def
random_archi_generator
(
nas_ss
,
random_state
):
'''random
'''
chosen_archi
=
{}
print
(
"zql: nas search space: "
,
nas_ss
)
for
block_name
,
block
in
nas_ss
.
items
():
tmp_block
=
{}
for
layer_name
,
layer
in
block
.
items
():
tmp_layer
=
{}
for
key
,
value
in
layer
.
items
():
if
key
==
'layer_choice'
:
index
=
random_state
.
randint
(
len
(
value
))
tmp_layer
[
'chosen_layer'
]
=
value
[
index
]
elif
key
==
'optional_inputs'
:
tmp_layer
[
'chosen_inputs'
]
=
[]
print
(
"zql: optional_inputs"
,
layer
[
'optional_inputs'
])
if
layer
[
'optional_inputs'
]:
if
isinstance
(
layer
[
'optional_input_size'
],
int
):
choice_num
=
layer
[
'optional_input_size'
]
else
:
choice_range
=
layer
[
'optional_input_size'
]
choice_num
=
random_state
.
randint
(
choice_range
[
0
],
choice_range
[
1
]
+
1
)
for
_
in
range
(
choice_num
):
index
=
random_state
.
randint
(
len
(
layer
[
'optional_inputs'
]))
tmp_layer
[
'chosen_inputs'
].
append
(
layer
[
'optional_inputs'
][
index
])
elif
key
==
'optional_input_size'
:
pass
else
:
raise
ValueError
(
'Unknown field %s in layer %s of block %s'
%
(
key
,
layer_name
,
block_name
))
tmp_block
[
layer_name
]
=
tmp_layer
chosen_archi
[
block_name
]
=
tmp_block
return
chosen_archi
class
RandomNASTuner
(
Tuner
):
'''RandomNASTuner
'''
def
__init__
(
self
):
self
.
searchspace_json
=
None
self
.
random_state
=
None
def
update_search_space
(
self
,
search_space
):
'''update
'''
self
.
searchspace_json
=
search_space
self
.
random_state
=
np
.
random
.
RandomState
()
def
generate_parameters
(
self
,
parameter_id
):
'''generate
'''
return
random_archi_generator
(
self
.
searchspace_json
,
self
.
random_state
)
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
value
):
'''receive
'''
pass
examples/tuners/weight_sharing/ga_customer_tuner/README.md
View file @
12410686
# How to use ga_customer_tuner?
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
type
`cd ~/nni/examples/trials/ga_squad`
and check readme.md to get more information for ga_squad trial.
# config
# config
If you want to use ga_customer_tuner in your experiment, you could set config file as following format:
```
...
...
setup.py
View file @
12410686
...
...
@@ -56,7 +56,8 @@ setup(
'scipy'
,
'schema'
,
'PythonWebHDFS'
,
'colorama'
'colorama'
,
'sklearn'
],
entry_points
=
{
...
...
src/nni_manager/common/restServer.ts
View file @
12410686
...
...
@@ -29,7 +29,7 @@ import { getBasePort } from './experimentStartupInfo';
/**
* Abstraction class to create a RestServer
* The module who wants to use a RestServer could <b>extends</b> this abstract class
* The module who wants to use a RestServer could <b>extends</b> this abstract class
* And implement its own registerRestHandler() function to register routers
*/
export
abstract
class
RestServer
{
...
...
@@ -43,7 +43,7 @@ export abstract class RestServer {
protected
app
:
express
.
Application
=
express
();
protected
log
:
Logger
=
getLogger
();
protected
basePort
?:
number
;
constructor
()
{
this
.
port
=
getBasePort
();
assert
(
this
.
port
&&
this
.
port
>
1024
);
...
...
@@ -91,9 +91,9 @@ export abstract class RestServer {
}
else
{
this
.
startTask
.
promise
.
then
(
()
=>
{
// Started
//Stops the server from accepting new connections and keeps existing connections.
//This function is asynchronous, the server is finally closed when all connections
//are ended and the server emits a 'close' event.
//Stops the server from accepting new connections and keeps existing connections.
//This function is asynchronous, the server is finally closed when all connections
//are ended and the server emits a 'close' event.
//Refer https://nodejs.org/docs/latest/api/net.html#net_server_close_callback
this
.
server
.
close
().
on
(
'
close
'
,
()
=>
{
this
.
log
.
info
(
'
Rest server stopped.
'
);
...
...
src/nni_manager/common/trainingService.ts
View file @
12410686
...
...
@@ -91,6 +91,7 @@ interface TrialJobMetric {
* define TrainingServiceError
*/
class
TrainingServiceError
extends
Error
{
private
errCode
:
number
;
constructor
(
errorCode
:
number
,
errorMessage
:
string
)
{
...
...
@@ -136,5 +137,3 @@ export {
TrainingServiceMetadata
,
TrialJobDetail
,
TrialJobMetric
,
HyperParameters
,
HostJobApplicationForm
,
JobApplicationForm
,
JobType
,
NNIManagerIpConfig
};
src/nni_manager/common/utils.ts
View file @
12410686
...
...
@@ -167,7 +167,7 @@ function getCmdPy(): string {
}
/**
* Generate command line to start automl algorithm(s),
* Generate command line to start automl algorithm(s),
* either start advisor or start a process which runs tuner and assessor
* @param tuner : For builtin tuner:
* {
...
...
@@ -361,11 +361,11 @@ function countFilesRecursively(directory: string, timeoutMilliSeconds?: number):
if
(
process
.
platform
===
"
win32
"
)
{
cmd
=
`powershell "Get-ChildItem -Path
${
directory
}
-Recurse -File | Measure-Object | %{$_.Count}"`
}
else
{
cmd
=
`find
${
directory
}
-type f | wc -l`
;
cmd
=
`find
${
directory
}
-type f | wc -l`
;
}
cpp
.
exec
(
cmd
).
then
((
result
)
=>
{
if
(
result
.
stdout
&&
parseInt
(
result
.
stdout
))
{
fileCount
=
parseInt
(
result
.
stdout
);
fileCount
=
parseInt
(
result
.
stdout
);
}
deferred
.
resolve
(
fileCount
);
});
...
...
@@ -374,6 +374,40 @@ function countFilesRecursively(directory: string, timeoutMilliSeconds?: number):
});
}
function
validateFileName
(
fileName
:
string
):
boolean
{
let
pattern
:
string
=
'
^[a-z0-9A-Z
\
.-_]+$
'
;
const
validateResult
=
fileName
.
match
(
pattern
);
if
(
validateResult
)
{
return
true
;
}
return
false
;
}
async
function
validateFileNameRecursively
(
directory
:
string
):
Promise
<
boolean
>
{
if
(
!
fs
.
existsSync
(
directory
))
{
throw
Error
(
`Direcotory
${
directory
}
doesn't exist`
);
}
const
fileNameArray
:
string
[]
=
fs
.
readdirSync
(
directory
);
let
result
=
true
;
for
(
var
name
of
fileNameArray
){
const
fullFilePath
:
string
=
path
.
join
(
directory
,
name
);
try
{
// validate file names and directory names
result
=
validateFileName
(
name
);
if
(
fs
.
lstatSync
(
fullFilePath
).
isDirectory
())
{
result
=
result
&&
await
validateFileNameRecursively
(
fullFilePath
);
}
if
(
!
result
)
{
return
Promise
.
reject
(
new
Error
(
`file name in
${
fullFilePath
}
is not valid!`
));
}
}
catch
(
error
)
{
return
Promise
.
reject
(
error
);
}
}
return
Promise
.
resolve
(
result
);
}
/**
* get the version of current package
*/
...
...
@@ -385,7 +419,7 @@ async function getVersion(): Promise<string> {
deferred
.
reject
(
error
);
});
return
deferred
.
promise
;
}
}
/**
* run command as ChildProcess
...
...
@@ -437,7 +471,7 @@ async function isAlive(pid:any): Promise<boolean> {
}
/**
* kill process
* kill process
*/
async
function
killPid
(
pid
:
any
):
Promise
<
void
>
{
let
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
...
...
@@ -466,7 +500,7 @@ function getNewLine(): string {
/**
* Use '/' to join path instead of '\' for all kinds of platform
* @param path
* @param path
*/
function
unixPathJoin
(...
paths
:
any
[]):
string
{
const
dir
:
string
=
paths
.
filter
((
path
:
any
)
=>
path
!==
''
).
join
(
'
/
'
);
...
...
@@ -474,6 +508,6 @@ function unixPathJoin(...paths: any[]): string {
return
dir
;
}
export
{
countFilesRecursively
,
getRemoteTmpDir
,
generateParamFileName
,
getMsgDispatcherCommand
,
getCheckpointDir
,
export
{
countFilesRecursively
,
validateFileNameRecursively
,
getRemoteTmpDir
,
generateParamFileName
,
getMsgDispatcherCommand
,
getCheckpointDir
,
getLogDir
,
getExperimentRootDir
,
getJobCancelStatus
,
getDefaultDatabaseDir
,
getIPV4Address
,
unixPathJoin
,
mkDirP
,
delay
,
prepareUnitTest
,
parseArg
,
cleanupUnitTest
,
uniqueString
,
randomSelect
,
getLogLevel
,
getVersion
,
getCmdPy
,
getTunerProc
,
isAlive
,
killPid
,
getNewLine
};
src/nni_manager/config/frameworkcontroller/frameworkcontrollerjob-crd-v1.json
View file @
12410686
{
"kind"
:
"CustomResourceDefinition"
,
"kind"
:
"CustomResourceDefinition"
,
"spec"
:
{
"scope"
:
"Namespaced"
,
"version"
:
"v1"
,
"group"
:
"frameworkcontroller.microsoft.com"
,
"scope"
:
"Namespaced"
,
"version"
:
"v1"
,
"group"
:
"frameworkcontroller.microsoft.com"
,
"names"
:
{
"kind"
:
"Framework"
,
"plural"
:
"frameworks"
,
"kind"
:
"Framework"
,
"plural"
:
"frameworks"
,
"singular"
:
"framework"
}
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
"metadata"
:
{
"name"
:
"frameworks.frameworkcontroller.microsoft.com"
}
...
...
src/nni_manager/config/kubeflow/pytorchjob-crd-v1alpha2.json
View file @
12410686
{
"kind"
:
"CustomResourceDefinition"
,
"kind"
:
"CustomResourceDefinition"
,
"spec"
:
{
"scope"
:
"Namespaced"
,
"version"
:
"v1alpha2"
,
"group"
:
"kubeflow.org"
,
"scope"
:
"Namespaced"
,
"version"
:
"v1alpha2"
,
"group"
:
"kubeflow.org"
,
"names"
:
{
"kind"
:
"PyTorchJob"
,
"plural"
:
"pytorchjobs"
,
"kind"
:
"PyTorchJob"
,
"plural"
:
"pytorchjobs"
,
"singular"
:
"pytorchjob"
}
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
"metadata"
:
{
"name"
:
"pytorchjobs.kubeflow.org"
}
...
...
src/nni_manager/config/kubeflow/pytorchjob-crd-v1beta1.json
View file @
12410686
{
"kind"
:
"CustomResourceDefinition"
,
"kind"
:
"CustomResourceDefinition"
,
"spec"
:
{
"scope"
:
"Namespaced"
,
"version"
:
"v1beta1"
,
"group"
:
"kubeflow.org"
,
"scope"
:
"Namespaced"
,
"version"
:
"v1beta1"
,
"group"
:
"kubeflow.org"
,
"names"
:
{
"kind"
:
"PyTorchJob"
,
"plural"
:
"pytorchjobs"
,
"kind"
:
"PyTorchJob"
,
"plural"
:
"pytorchjobs"
,
"singular"
:
"pytorchjob"
}
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
"metadata"
:
{
"name"
:
"pytorchjobs.kubeflow.org"
}
...
...
src/nni_manager/config/kubeflow/tfjob-crd-v1alpha2.json
View file @
12410686
{
"kind"
:
"CustomResourceDefinition"
,
"kind"
:
"CustomResourceDefinition"
,
"spec"
:
{
"scope"
:
"Namespaced"
,
"version"
:
"v1alpha2"
,
"group"
:
"kubeflow.org"
,
"scope"
:
"Namespaced"
,
"version"
:
"v1alpha2"
,
"group"
:
"kubeflow.org"
,
"names"
:
{
"kind"
:
"TFJob"
,
"plural"
:
"tfjobs"
,
"kind"
:
"TFJob"
,
"plural"
:
"tfjobs"
,
"singular"
:
"tfjob"
}
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
"metadata"
:
{
"name"
:
"tfjobs.kubeflow.org"
}
...
...
src/nni_manager/config/kubeflow/tfjob-crd-v1beta1.json
View file @
12410686
{
"kind"
:
"CustomResourceDefinition"
,
"kind"
:
"CustomResourceDefinition"
,
"spec"
:
{
"scope"
:
"Namespaced"
,
"version"
:
"v1beta1"
,
"group"
:
"kubeflow.org"
,
"scope"
:
"Namespaced"
,
"version"
:
"v1beta1"
,
"group"
:
"kubeflow.org"
,
"names"
:
{
"kind"
:
"TFJob"
,
"plural"
:
"tfjobs"
,
"kind"
:
"TFJob"
,
"plural"
:
"tfjobs"
,
"singular"
:
"tfjob"
}
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
},
"apiVersion"
:
"apiextensions.k8s.io/v1beta1"
,
"metadata"
:
{
"name"
:
"tfjobs.kubeflow.org"
}
...
...
src/nni_manager/core/nnimanager.ts
View file @
12410686
...
...
@@ -159,7 +159,7 @@ class NNIManager implements Manager {
if
(
expParams
.
logCollection
!==
undefined
)
{
this
.
trainingService
.
setClusterMetadata
(
'
log_collection
'
,
expParams
.
logCollection
.
toString
());
}
const
dispatcherCommand
:
string
=
getMsgDispatcherCommand
(
expParams
.
tuner
,
expParams
.
assessor
,
expParams
.
advisor
,
expParams
.
multiPhase
,
expParams
.
multiThread
);
this
.
log
.
debug
(
`dispatcher command:
${
dispatcherCommand
}
`
);
...
...
@@ -493,7 +493,7 @@ class NNIManager implements Manager {
// If trialConcurrency does not change, requestTrialNum equals finishedTrialJobNum.
// If trialConcurrency changes, for example, trialConcurrency increases by 2 (trialConcurrencyChange=2), then
// requestTrialNum equals 2 + finishedTrialJobNum and trialConcurrencyChange becomes 0.
// If trialConcurrency changes, for example, trialConcurrency decreases by 4 (trialConcurrencyChange=-4) and
// If trialConcurrency changes, for example, trialConcurrency decreases by 4 (trialConcurrencyChange=-4) and
// finishedTrialJobNum is 2, then requestTrialNum becomes -2. No trial will be requested from tuner,
// and trialConcurrencyChange becomes -2.
const
requestTrialNum
:
number
=
this
.
trialConcurrencyChange
+
finishedTrialJobNum
;
...
...
src/nni_manager/core/test/ipcInterface.test.ts
View file @
12410686
...
...
@@ -46,11 +46,11 @@ function runProcess(): Promise<Error | null> {
if
(
code
!==
0
)
{
deferred
.
resolve
(
new
Error
(
`return code:
${
code
}
`
));
}
else
{
let
str
=
proc
.
stdout
.
read
().
toString
();
let
str
=
proc
.
stdout
.
read
().
toString
();
if
(
str
.
search
(
"
\r\n
"
)
!=-
1
){
sentCommands
=
str
.
split
(
"
\r\n
"
);
}
else
{
else
{
sentCommands
=
str
.
split
(
'
\n
'
);
}
deferred
.
resolve
(
null
);
...
...
@@ -76,7 +76,7 @@ function runProcess(): Promise<Error | null> {
commandTooLong
=
error
;
}
// Command #4: FE is not tuner/assessor command, test the exception type of send non-valid command
// Command #4: FE is not tuner/assessor command, test the exception type of send non-valid command
try
{
dispatcher
.
sendCommand
(
'
FE
'
,
'
1
'
);
}
catch
(
error
)
{
...
...
src/nni_manager/core/test/mockedTrainingService.ts
View file @
12410686
...
...
@@ -59,10 +59,10 @@ class MockedTrainingService extends TrainingService {
},
sequenceId
:
0
};
public
listTrialJobs
():
Promise
<
TrialJobDetail
[]
>
{
const
deferred
=
new
Deferred
<
TrialJobDetail
[]
>
();
deferred
.
resolve
([
this
.
jobDetail1
,
this
.
jobDetail2
]);
return
deferred
.
promise
;
}
...
...
src/nni_manager/core/test/nnimanager.test.ts
View file @
12410686
...
...
@@ -104,7 +104,7 @@ describe('Unit test for nnimanager', function () {
maxSequenceId
:
0
,
revision
:
0
}
before
(
async
()
=>
{
await
initContainer
();
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment