test_cgo_engine.py 2.63 KB
Newer Older
1
2
3
4
5
6
7
import json
import os
import sys
import threading
import unittest
import logging
import time
QuanluZhang's avatar
QuanluZhang committed
8
9
10
import torch

from pathlib import Path
11
12
13
14
15
16
17
18
19
20

from nni.retiarii.execution.cgo_engine import CGOExecutionEngine
from nni.retiarii.execution.logical_optimizer.logical_plan import LogicalPlan
from nni.retiarii.execution.logical_optimizer.opt_dedup_input import DedupInputOptimizer
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii import Model, Node

from nni.retiarii import Model, submit_models
from nni.retiarii.codegen import model_to_pytorch_script
from nni.retiarii.integration import RetiariiAdvisor
21
from nni.retiarii.evaluator.pytorch import PyTorchImageClassificationTrainer, PyTorchMultiModelTrainer
22
23
24
25
from nni.retiarii.utils import import_


def _load_mnist(n_models: int = 1):
QuanluZhang's avatar
QuanluZhang committed
26
27
    path = Path(__file__).parent / 'converted_mnist_pytorch.json'
    with open(path) as f:
28
29
30
31
32
33
34
35
        mnist_model = Model._load(json.load(f))
    if n_models == 1:
        return mnist_model
    else:
        models = [mnist_model]
        for i in range(n_models-1):
            models.append(mnist_model.fork())
        return models
36
37
38


@unittest.skip('Skipped in this version')
39
class CGOEngineTest(unittest.TestCase):
40

41
42
43
44
    def test_submit_models(self):
        os.environ['CGO'] = 'true'
        os.makedirs('generated', exist_ok=True)
        from nni.runtime import protocol, platform
QuanluZhang's avatar
QuanluZhang committed
45
        import nni.runtime.platform.test as tt
46
47
48
49
        protocol._out_file = open('generated/debug_protocol_out_file.py', 'wb')
        protocol._in_file = open('generated/debug_protocol_out_file.py', 'rb')

        models = _load_mnist(2)
50
        advisor = RetiariiAdvisor()
51
52
53
54
55
56
57
        submit_models(*models)

        if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
            cmd, data = protocol.receive()
            params = json.loads(data)
            params['parameters']['training_kwargs']['max_steps'] = 100

QuanluZhang's avatar
QuanluZhang committed
58
            tt.init_params(params)
59

60
61
62
63
64
            trial_thread = threading.Thread(target=CGOExecutionEngine.trial_execute_graph())
            trial_thread.start()
            last_metric = None
            while True:
                time.sleep(1)
QuanluZhang's avatar
QuanluZhang committed
65
66
                if tt._last_metric:
                    metric = tt.get_last_metric()
67
68
69
70
71
72
73
74
75
76
77
78
79
80
                    if metric == last_metric:
                        continue
                    advisor.handle_report_metric_data(metric)
                    last_metric = metric
                if not trial_thread.is_alive():
                    break

            trial_thread.join()
        advisor.stopping = True
        advisor.default_worker.join()
        advisor.assessor_worker.join()


if __name__ == '__main__':
81
    unittest.main()