record.py 7.26 KB
Newer Older
1
2
3
4
5
6
7
8
9
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""A module for data of monitor."""

import json
import numbers
from datetime import datetime

10
11
from superbench.benchmarks import ReduceType

12
13
14

class MonitorRecord:
    """Record class to save all monitoring data."""
15
16
17
18
19
20
21
22
    reduce_ops = {
        'gpu_temperature': ReduceType.MAX,
        'gpu_power_limit': ReduceType.MIN,
        'gpu_corrected_ecc': ReduceType.LAST,
        'gpu_uncorrected_ecc': ReduceType.LAST,
        'gpu_remap': ReduceType.LAST,
    }

23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    def __init__(self):
        """Constructor."""
        self.__time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
        self.__cpu_usage = None
        self.__mem_used = None
        self.__mem_total = None
        self.__gpu_usage = list()
        self.__gpu_temperature = list()
        self.__gpu_power_limit = list()
        self.__gpu_mem_used = list()
        self.__gpu_mem_total = list()
        self.__gpu_corrected_ecc = list()
        self.__gpu_uncorrected_ecc = list()
        self.__gpu_remap_info = list()
        self.__net_receive = dict()
        self.__net_transmit = dict()

    @property
    def time(self):
        """Decoration function to access __time."""
        return self.__time

    @property
    def cpu_usage(self):
        """Decoration function to access __cpu_usage."""
        return self.__cpu_usage

    @cpu_usage.setter
    def cpu_usage(self, usage):
        """Set the cpu usage.

        Args:
            usage (float): cpu usage.
        """
        self.__cpu_usage = usage

    @property
    def mem_used(self):
        """Decoration function to access __mem_used."""
        return self.__mem_used

    @mem_used.setter
    def mem_used(self, mem_used):
        """Set the used host memory, unit: MB.

        Args:
            mem_used (float): used host memory.
        """
        self.__mem_used = mem_used

    @property
    def mem_total(self):
        """Decoration function to access __mem_total."""
        return self.__mem_total

    @mem_total.setter
    def mem_total(self, mem_total):
        """Set the total host memory, unit: MB.

        Args:
            mem_total (float): total host memory.
        """
        self.__mem_total = mem_total

    @property
    def gpu_usage(self):
        """Decoration function to access __gpu_usage."""
        return self.__gpu_usage

    @gpu_usage.setter
    def gpu_usage(self, gpu_usage):
        """Set the gpu usage.

        Args:
            gpu_usage (list): list of gpu usage.
        """
        self.__gpu_usage = gpu_usage

    @property
    def gpu_temperature(self):
        """Decoration function to access __gpu_temperature."""
        return self.__gpu_temperature

    @gpu_temperature.setter
    def gpu_temperature(self, gpu_temperature):
        """Set the gpu temperature, unit: Celsius.

        Args:
            gpu_temperature (list): list of gpu temperature.
        """
        self.__gpu_temperature = gpu_temperature

    @property
    def gpu_power_limit(self):
        """Decoration function to access __gpu_power_limit."""
        return self.__gpu_power_limit

    @gpu_power_limit.setter
    def gpu_power_limit(self, gpu_power_limit):
        """Set the gpu power limit, unit: Watt.

        Args:
            gpu_power_limit (list): list of gpu power limit.
        """
        self.__gpu_power_limit = gpu_power_limit

    @property
    def gpu_mem_used(self):
        """Decoration function to access __gpu_mem_used."""
        return self.__gpu_mem_used

    @gpu_mem_used.setter
    def gpu_mem_used(self, gpu_mem_used):
        """Set the used gpu memory, unit: MB.

        Args:
            gpu_mem_used (list): list of used gpu memory.
        """
        self.__gpu_mem_used = gpu_mem_used

    @property
    def gpu_mem_total(self):
        """Decoration function to access __gpu_mem_total."""
        return self.__gpu_mem_total

    @gpu_mem_total.setter
    def gpu_mem_total(self, gpu_mem_total):
        """Set the total gpu memory, unit: MB.

        Args:
            gpu_mem_total (list): list of total gpu memory.
        """
        self.__gpu_mem_total = gpu_mem_total

    @property
    def gpu_corrected_ecc(self):
        """Decoration function to access __gpu_corrected_ecc."""
        return self.__gpu_corrected_ecc

    @gpu_corrected_ecc.setter
    def gpu_corrected_ecc(self, gpu_corrected_ecc):
        """Set the count of corrected (single bit) ecc error.

        Args:
            gpu_corrected_ecc (list): list of gpu corrected ecc error.
        """
        self.__gpu_corrected_ecc = gpu_corrected_ecc

    @property
    def gpu_uncorrected_ecc(self):
        """Decoration function to access __gpu_uncorrected_ecc."""
        return self.__gpu_uncorrected_ecc

    @gpu_uncorrected_ecc.setter
    def gpu_uncorrected_ecc(self, gpu_uncorrected_ecc):
        """Set the count of uncorrected (double bit) ecc error.

        Args:
            gpu_uncorrected_ecc (list): list of gpu uncorrected ecc error.
        """
        self.__gpu_uncorrected_ecc = gpu_uncorrected_ecc

    @property
    def gpu_remap_info(self):
        """Decoration function to access __gpu_remap_info."""
        return self.__gpu_remap_info

    @gpu_remap_info.setter
    def gpu_remap_info(self, gpu_remap_info):
        """Set the gpu remap_info.

        Args:
            gpu_remap_info (list): list of gpu remap_info.
        """
        self.__gpu_remap_info = gpu_remap_info

    @property
    def net_receive(self):
        """Decoration function to access __net_receive."""
        return self.__net_receive

    @net_receive.setter
    def net_receive(self, net_receive):
        """Set the network receive bandwidth, unit: Bytes/s.

        Args:
            net_receive (dict): receive bandwidth for all devices.
        """
        self.__net_receive = net_receive

    @property
    def net_transmit(self):
        """Decoration function to access __net_transmit."""
        return self.__net_transmit

    @net_transmit.setter
    def net_transmit(self, net_transmit):
        """Set the network transmit bandwidth, unit: Bytes/s.

        Args:
            net_transmit (dict): transmit bandwidth for all devices.
        """
        self.__net_transmit = net_transmit

    def to_string(self):
        """Serialize the MonitorRecord object to string.

        Return:
            The serialized string of MonitorRecord object.
        """
        formatted_obj = dict()
        for key, value in self.__dict__.items():
            # The name of internal member is like '_MonitorRecord__name'.
            # For the result object return to caller, need to reformat the 'name' as key.
            formatted_key = key.split('__')[1]
            if isinstance(value, numbers.Number) or isinstance(value, str):
                formatted_obj[formatted_key] = value
            elif isinstance(value, list):
                for i, item in enumerate(value):
                    if isinstance(item, numbers.Number):
                        formatted_obj['{}:{}'.format(formatted_key, i)] = item
                    elif isinstance(item, dict):
                        for k, v in item.items():
                            formatted_obj['{}:{}'.format(k, i)] = v
            elif isinstance(value, dict):
                for k, v in value.items():
                    formatted_obj[k] = v

        return json.dumps(formatted_obj)