Unverified Commit 7d1acfbd authored by Zhenhua Han's avatar Zhenhua Han Committed by GitHub
Browse files

[Retiarii] cross-graph optimization: input deduplication (#3105)

parent 165756cc
...@@ -3,8 +3,13 @@ Entrypoint for trials. ...@@ -3,8 +3,13 @@ Entrypoint for trials.
Assuming execution engine is BaseExecutionEngine. Assuming execution engine is BaseExecutionEngine.
""" """
import os
from .execution.base import BaseExecutionEngine from .execution.base import BaseExecutionEngine
from .execution.cgo_engine import CGOExecutionEngine
if __name__ == '__main__': if __name__ == '__main__':
BaseExecutionEngine.trial_execute_graph() if os.environ.get('CGO') == 'true':
CGOExecutionEngine.trial_execute_graph()
else:
BaseExecutionEngine.trial_execute_graph()
...@@ -7,3 +7,4 @@ assessor_result.txt ...@@ -7,3 +7,4 @@ assessor_result.txt
_generated_model.py _generated_model.py
data data
generated generated
{
"_model__stem":{
"inputs":[
"_inputs__1"
],
"outputs":[
"pool2__1"
],
"nodes":{
"_model__stem__conv1":{
"operation":{
"type":"__torch__.torch.nn.modules.conv.Conv2d",
"parameters":{
"out_channels":32,
"in_channels":1,
"kernel_size":5
}
}
},
"_model__stem__pool1":{
"operation":{
"type":"__torch__.torch.nn.modules.pooling.MaxPool2d",
"parameters":{
"kernel_size":2
}
}
},
"_model__stem__conv2":{
"operation":{
"type":"__torch__.torch.nn.modules.conv.Conv2d",
"parameters":{
"out_channels":64,
"in_channels":32,
"kernel_size":5
}
}
},
"_model__stem__pool2":{
"operation":{
"type":"__torch__.torch.nn.modules.pooling.MaxPool2d",
"parameters":{
"kernel_size":2
}
}
}
},
"edges":[
{
"head":[
"_inputs",
0
],
"tail":[
"_model__stem__conv1",
0
]
},
{
"head":[
"_model__stem__conv1",
null
],
"tail":[
"_model__stem__pool1",
0
]
},
{
"head":[
"_model__stem__pool1",
null
],
"tail":[
"_model__stem__conv2",
0
]
},
{
"head":[
"_model__stem__conv2",
null
],
"tail":[
"_model__stem__pool2",
0
]
},
{
"head":[
"_model__stem__pool2",
null
],
"tail":[
"_outputs",
null
]
}
]
},
"_model":{
"inputs":[
"image__1"
],
"outputs":[
"softmax__1"
],
"nodes":{
"_model__Constant2":{
"operation":{
"type":"prim::Constant",
"parameters":{
}
}
},
"_model__Constant3":{
"operation":{
"type":"prim::Constant",
"parameters":{
"value":3
}
}
},
"_model__Constant4":{
"operation":{
"type":"prim::Constant",
"parameters":{
"value":-1
}
}
},
"_model__Constant5":{
"operation":{
"type":"prim::Constant",
"parameters":{
"value":0
}
}
},
"_model__stem":{
"operation":{
"type":"_cell",
"parameters":{
},
"cell_name":"_model__stem"
}
},
"_model__Size6":{
"operation":{
"type":"aten::size",
"parameters":{
}
}
},
"_model__ListConstruct7":{
"operation":{
"type":"prim::ListConstruct",
"parameters":{
}
}
},
"_model__View8":{
"operation":{
"type":"aten::view",
"parameters":{
}
}
},
"_model__fc1":{
"operation":{
"type":"__torch__.torch.nn.modules.linear.Linear",
"parameters":{
"in_features":1024,
"out_features":256
}
}
},
"_model__fc2":{
"operation":{
"type":"__torch__.torch.nn.modules.linear.Linear",
"parameters":{
"in_features":256,
"out_features":10
}
}
},
"_model__softmax9":{
"operation":{
"type":"Function.softmax",
"parameters":{
}
}
}
},
"edges":[
{
"head":[
"_inputs",
0
],
"tail":[
"_model__stem",
0
]
},
{
"head":[
"_model__stem",
null
],
"tail":[
"_model__Size6",
0
]
},
{
"head":[
"_model__Constant5",
null
],
"tail":[
"_model__Size6",
1
]
},
{
"head":[
"_model__Size6",
null
],
"tail":[
"_model__ListConstruct7",
0
]
},
{
"head":[
"_model__Constant4",
null
],
"tail":[
"_model__ListConstruct7",
1
]
},
{
"head":[
"_model__stem",
null
],
"tail":[
"_model__View8",
0
]
},
{
"head":[
"_model__ListConstruct7",
null
],
"tail":[
"_model__View8",
1
]
},
{
"head":[
"_model__View8",
null
],
"tail":[
"_model__fc1",
0
]
},
{
"head":[
"_model__fc1",
null
],
"tail":[
"_model__fc2",
0
]
},
{
"head":[
"_model__fc2",
null
],
"tail":[
"_model__softmax9",
0
]
},
{
"head":[
"_model__Constant4",
null
],
"tail":[
"_model__softmax9",
1
]
},
{
"head":[
"_model__Constant3",
null
],
"tail":[
"_model__softmax9",
2
]
},
{
"head":[
"_model__Constant2",
null
],
"tail":[
"_model__softmax9",
3
]
},
{
"head":[
"_model__softmax9",
null
],
"tail":[
"_outputs",
null
]
}
]
},
"_training_config": {
"module": "nni.retiarii.trainer.PyTorchImageClassificationTrainer",
"kwargs": {
"dataset_cls": "MNIST",
"dataset_kwargs": {
"root": "data/mnist",
"download": true
},
"dataloader_kwargs": {
"batch_size": 32
},
"optimizer_cls" : "SGD",
"optimizer_kwargs": {
"lr": 1e-3
},
"trainer_kwargs": {
"max_epochs": 1
}
}
}
}
\ No newline at end of file
import json import json
import os import os
import logging
from nni.retiarii import Model, submit_models, wait_models from nni.retiarii import Model, submit_models, wait_models
def single_model_startegy():
with open(os.path.join(os.path.dirname(__file__), 'mnist_pytorch.json')) as f: def single_model_strategy():
with open(os.path.join(os.path.dirname(__file__), 'converted_mnist_pytorch.json')) as f:
ir = json.load(f) ir = json.load(f)
model = Model._load(ir) model = Model._load(ir)
submit_models(model) submit_models(model)
wait_models(model) wait_models(model)
print('Strategy says:', model.metric) print('Strategy says:', model.metric)
def multi_model_cgo():
os.environ['CGO'] = 'true'
with open(os.path.join(os.path.dirname(__file__), 'converted_mnist_pytorch.json')) as f:
ir = json.load(f)
m = Model._load(ir)
models = [m]
for i in range(3):
models.append(m.fork())
submit_models(*models)
wait_models(*models)
print('Strategy says:', [_.metric for _ in models])
if __name__ == '__main__': if __name__ == '__main__':
single_model_startegy() single_model_strategy()
{"inputs": null, "outputs": null, "nodes": {"2__outputs": {"operation": {"type": "_outputs", "parameters": {}}}, "2__model__Constant2": {"operation": {"type": "prim::Constant", "parameters": {}}}, "2__model__Constant3": {"operation": {"type": "prim::Constant", "parameters": {"value": 3}}}, "2__model__Constant4": {"operation": {"type": "prim::Constant", "parameters": {"value": -1}}}, "2__model__Constant5": {"operation": {"type": "prim::Constant", "parameters": {"value": 0}}}, "2__model__stem": {"operation": {"type": "_cell", "parameters": {}, "cell_name": "_model__stem"}}, "2__model__Size6": {"operation": {"type": "aten::size", "parameters": {}}}, "2__model__ListConstruct7": {"operation": {"type": "prim::ListConstruct", "parameters": {}}}, "2__model__View8": {"operation": {"type": "aten::view", "parameters": {}}}, "2__model__fc1": {"operation": {"type": "__torch__.torch.nn.modules.linear.Linear", "parameters": {"in_features": 1024, "out_features": 256}}}, "2__model__fc2": {"operation": {"type": "__torch__.torch.nn.modules.linear.Linear", "parameters": {"in_features": 256, "out_features": 10}}}, "2__model__softmax9": {"operation": {"type": "Function.softmax", "parameters": {}}}, "3__outputs": {"operation": {"type": "_outputs", "parameters": {}}}, "3__model__Constant2": {"operation": {"type": "prim::Constant", "parameters": {}}}, "3__model__Constant3": {"operation": {"type": "prim::Constant", "parameters": {"value": 3}}}, "3__model__Constant4": {"operation": {"type": "prim::Constant", "parameters": {"value": -1}}}, "3__model__Constant5": {"operation": {"type": "prim::Constant", "parameters": {"value": 0}}}, "3__model__stem": {"operation": {"type": "_cell", "parameters": {}, "cell_name": "_model__stem"}}, "3__model__Size6": {"operation": {"type": "aten::size", "parameters": {}}}, "3__model__ListConstruct7": {"operation": {"type": "prim::ListConstruct", "parameters": {}}}, "3__model__View8": {"operation": {"type": "aten::view", "parameters": {}}}, "3__model__fc1": {"operation": {"type": "__torch__.torch.nn.modules.linear.Linear", "parameters": {"in_features": 1024, "out_features": 256}}}, "3__model__fc2": {"operation": {"type": "__torch__.torch.nn.modules.linear.Linear", "parameters": {"in_features": 256, "out_features": 10}}}, "3__model__softmax9": {"operation": {"type": "Function.softmax", "parameters": {}}}, "4__outputs": {"operation": {"type": "_outputs", "parameters": {}}}, "4__model__Constant2": {"operation": {"type": "prim::Constant", "parameters": {}}}, "4__model__Constant3": {"operation": {"type": "prim::Constant", "parameters": {"value": 3}}}, "4__model__Constant4": {"operation": {"type": "prim::Constant", "parameters": {"value": -1}}}, "4__model__Constant5": {"operation": {"type": "prim::Constant", "parameters": {"value": 0}}}, "4__model__stem": {"operation": {"type": "_cell", "parameters": {}, "cell_name": "_model__stem"}}, "4__model__Size6": {"operation": {"type": "aten::size", "parameters": {}}}, "4__model__ListConstruct7": {"operation": {"type": "prim::ListConstruct", "parameters": {}}}, "4__model__View8": {"operation": {"type": "aten::view", "parameters": {}}}, "4__model__fc1": {"operation": {"type": "__torch__.torch.nn.modules.linear.Linear", "parameters": {"in_features": 1024, "out_features": 256}}}, "4__model__fc2": {"operation": {"type": "__torch__.torch.nn.modules.linear.Linear", "parameters": {"in_features": 256, "out_features": 10}}}, "4__model__softmax9": {"operation": {"type": "Function.softmax", "parameters": {}}}, "1_Dedup__inputs": {"operation": {"type": "_inputs", "parameters": {}}}}, "edges": [["Dedup__inputs", "2__model__stem"], ["2__model__stem", "2__model__Size6"], ["2__model__Constant5", "2__model__Size6"], ["2__model__Size6", "2__model__ListConstruct7"], ["2__model__Constant4", "2__model__ListConstruct7"], ["2__model__stem", "2__model__View8"], ["2__model__ListConstruct7", "2__model__View8"], ["2__model__View8", "2__model__fc1"], ["2__model__fc1", "2__model__fc2"], ["2__model__fc2", "2__model__softmax9"], ["2__model__Constant4", "2__model__softmax9"], ["2__model__Constant3", "2__model__softmax9"], ["2__model__Constant2", "2__model__softmax9"], ["2__model__softmax9", "2__outputs"], ["Dedup__inputs", "3__model__stem"], ["3__model__stem", "3__model__Size6"], ["3__model__Constant5", "3__model__Size6"], ["3__model__Size6", "3__model__ListConstruct7"], ["3__model__Constant4", "3__model__ListConstruct7"], ["3__model__stem", "3__model__View8"], ["3__model__ListConstruct7", "3__model__View8"], ["3__model__View8", "3__model__fc1"], ["3__model__fc1", "3__model__fc2"], ["3__model__fc2", "3__model__softmax9"], ["3__model__Constant4", "3__model__softmax9"], ["3__model__Constant3", "3__model__softmax9"], ["3__model__Constant2", "3__model__softmax9"], ["3__model__softmax9", "3__outputs"], ["Dedup__inputs", "4__model__stem"], ["4__model__stem", "4__model__Size6"], ["4__model__Constant5", "4__model__Size6"], ["4__model__Size6", "4__model__ListConstruct7"], ["4__model__Constant4", "4__model__ListConstruct7"], ["4__model__stem", "4__model__View8"], ["4__model__ListConstruct7", "4__model__View8"], ["4__model__View8", "4__model__fc1"], ["4__model__fc1", "4__model__fc2"], ["4__model__fc2", "4__model__softmax9"], ["4__model__Constant4", "4__model__softmax9"], ["4__model__Constant3", "4__model__softmax9"], ["4__model__Constant2", "4__model__softmax9"], ["4__model__softmax9", "4__outputs"]]}
\ No newline at end of file
...@@ -23,10 +23,10 @@ ...@@ -23,10 +23,10 @@
"stem": { "stem": {
"nodes": { "nodes": {
"conv1": {"operation": {"type": "Conv2d", "parameters": {"out_channels": 32, "in_channels": 1, "kernel_size": 5}}}, "conv1": {"operation": {"type": "__torch__.Conv2d", "parameters": {"out_channels": 32, "in_channels": 1, "kernel_size": 5}}},
"pool1": {"operation": {"type": "MaxPool2d", "parameters": {"kernel_size": 2}}}, "pool1": {"operation": {"type": "__torch__.MaxPool2d", "parameters": {"kernel_size": 2}}},
"conv2": {"operation": {"type": "Conv2d", "parameters": {"out_channels": 64, "in_channels": 32, "kernel_size": 5}}}, "conv2": {"operation": {"type": "__torch__.Conv2d", "parameters": {"out_channels": 64, "in_channels": 32, "kernel_size": 5}}},
"pool2": {"operation": {"type": "MaxPool2d", "parameters": {"kernel_size": 2}}} "pool2": {"operation": {"type": "__torch__.MaxPool2d", "parameters": {"kernel_size": 2}}}
}, },
"edges": [ "edges": [
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
"_training_config": { "_training_config": {
"module": "nni.retiarii.trainer.PyTorchImageClassificationTrainer", "module": "nni.retiarii.trainer.PyTorchImageClassificationTrainer",
"kwargs": { "kwargs": {
"dataset_cls": "MNIST",
"dataset_kwargs": { "dataset_kwargs": {
"root": "data/mnist", "root": "data/mnist",
"download": true "download": true
...@@ -48,6 +49,7 @@ ...@@ -48,6 +49,7 @@
"dataloader_kwargs": { "dataloader_kwargs": {
"batch_size": 32 "batch_size": 32
}, },
"optimizer_cls" : "SGD",
"optimizer_kwargs": { "optimizer_kwargs": {
"lr": 1e-3 "lr": 1e-3
}, },
......
...@@ -11,7 +11,7 @@ advisor: ...@@ -11,7 +11,7 @@ advisor:
classFileName: advisor_entry.py classFileName: advisor_entry.py
className: RetiariiAdvisor className: RetiariiAdvisor
classArgs: classArgs:
strategy: debug_strategy.single_model_startegy strategy: debug_strategy.single_model_strategy
trial: trial:
command: python3 -m nni.retiarii.trial_entry command: python3 -m nni.retiarii.trial_entry
codeDir: ../.. codeDir: ../..
......
authorName: nni
experimentName: naive
trialConcurrency: 4
maxExecDuration: 1h
maxTrialNum: 10
trainingServicePlatform: local
searchSpacePath: fake_search_space.json
useAnnotation: false
advisor:
codeDir: .
classFileName: advisor_entry.py
className: RetiariiAdvisor
classArgs:
strategy: debug_strategy.multi_model_cgo
trial:
command: CGO=true python -m nni.retiarii.trial_entry
codeDir: ../..
gpuNum: 4
import json
import os
import sys
import threading
import unittest
import logging
import time
import torch
from nni.retiarii.execution.cgo_engine import CGOExecutionEngine
from nni.retiarii.execution.logical_optimizer.logical_plan import LogicalPlan
from nni.retiarii.execution.logical_optimizer.opt_dedup_input import DedupInputOptimizer
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii import Model, Node
from nni.retiarii import Model, submit_models
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii.integration import RetiariiAdvisor
from nni.retiarii.trainer import PyTorchImageClassificationTrainer, PyTorchMultiModelTrainer
from nni.retiarii.utils import import_
def _load_mnist(n_models: int = 1):
with open('converted_mnist_pytorch.json') as f:
mnist_model = Model._load(json.load(f))
if n_models == 1:
return mnist_model
else:
models = [mnist_model]
for i in range(n_models-1):
models.append(mnist_model.fork())
return models
class CGOEngineTest(unittest.TestCase):
def test_submit_models(self):
os.environ['CGO'] = 'true'
os.makedirs('generated', exist_ok=True)
from nni.runtime import protocol, platform
protocol._out_file = open('generated/debug_protocol_out_file.py', 'wb')
protocol._in_file = open('generated/debug_protocol_out_file.py', 'rb')
models = _load_mnist(2)
anything = lambda: None
advisor = RetiariiAdvisor(anything)
submit_models(*models)
if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
cmd, data = protocol.receive()
params = json.loads(data)
params['parameters']['training_kwargs']['max_steps'] = 100
platform.test.init_params(params)
trial_thread = threading.Thread(target=CGOExecutionEngine.trial_execute_graph())
trial_thread.start()
last_metric = None
while True:
time.sleep(1)
if platform.test._last_metric:
metric = platform.test.get_last_metric()
if metric == last_metric:
continue
advisor.handle_report_metric_data(metric)
last_metric = metric
if not trial_thread.is_alive():
break
trial_thread.join()
advisor.stopping = True
advisor.default_worker.join()
advisor.assessor_worker.join()
if __name__ == '__main__':
#CGOEngineTest().test_dedup_input()
#CGOEngineTest().test_submit_models()
unittest.main()
\ No newline at end of file
import json
import os
import sys
import threading
import unittest
import logging
import time
from nni.retiarii.execution.cgo_engine import CGOExecutionEngine
from nni.retiarii.execution.logical_optimizer.logical_plan import LogicalPlan
from nni.retiarii.execution.logical_optimizer.opt_dedup_input import DedupInputOptimizer
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii import Model, Node
from nni.retiarii import Model, submit_models
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii.integration import RetiariiAdvisor
from nni.retiarii.trainer import PyTorchImageClassificationTrainer, PyTorchMultiModelTrainer
from nni.retiarii.utils import import_
def _load_mnist(n_models: int = 1):
with open('converted_mnist_pytorch.json') as f:
mnist_model = Model._load(json.load(f))
if n_models == 1:
return mnist_model
else:
models = [mnist_model]
for i in range(n_models-1):
models.append(mnist_model.fork())
return models
class DedupInputTest(unittest.TestCase):
def _build_logical_with_mnist(self, n_models : int):
lp = LogicalPlan()
models = _load_mnist(n_models = n_models)
for m in models:
lp.add_model(m)
return lp, models
def _test_add_model(self):
lp, models = self._build_logical_with_mnist(3)
for node in lp.logical_graph.hidden_nodes:
old_nodes = [ m.root_graph.get_node_by_id(node.id) for m in models]
self.assertTrue(any([old_nodes[0].__repr__() == Node.__repr__(x) for x in old_nodes]))
def test_dedup_input(self):
os.environ['CGO'] = 'true'
lp, models = self._build_logical_with_mnist(3)
opt = DedupInputOptimizer()
opt.convert(lp)
with open('dedup_logical_graph.json' , 'r') as fp:
correct_dump = fp.readlines()
lp_dump = lp.logical_graph._dump()
self.assertTrue(correct_dump[0] == json.dumps(lp_dump))
anything = lambda: None
advisor = RetiariiAdvisor(anything)
cgo = CGOExecutionEngine()
phy_models = cgo._assemble(lp)
self.assertTrue(len(phy_models) == 1)
# logging.info(phy_models[0][0]._dump())
# script=model_to_pytorch_script(phy_models[0][0], placement = phy_models[0][1])
# logging.info(script)
# with open('generated/debug_dedup_input.py', 'w') as fp:
# fp.write(script)
# sys.path.insert(0, 'generated')
# multi_model = import_('debug_dedup_input.logical_0')
# trainer = PyTorchMultiModelTrainer(
# multi_model(), phy_models[0][0].training_config.kwargs
# )
# trainer.fit()
advisor.stopping = True
advisor.default_worker.join()
advisor.assessor_worker.join()
if __name__ == '__main__':
#CGOEngineTest().test_dedup_input()
#CGOEngineTest().test_submit_models()
unittest.main()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment