Unverified Commit 9388f8f5 authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

add more checks for model base (#12)


Co-authored-by: default avatarGuoshuai Zhao <guzhao@microsoft.com>
parent abc6c991
...@@ -39,10 +39,12 @@ def __init__(self, name, parameters=''): ...@@ -39,10 +39,12 @@ def __init__(self, name, parameters=''):
super().__init__(name, parameters) super().__init__(name, parameters)
self._benchmark_type = BenchmarkType.MODEL self._benchmark_type = BenchmarkType.MODEL
self._world_size = None self._world_size = 1
self._local_rank = None
self._dataset = None self._dataset = None
self._dataloader = None self._dataloader = None
self._model = None self._model = None
self._optimizer_type = None
self._optimizer = None self._optimizer = None
self._loss_fn = None self._loss_fn = None
self._target = None self._target = None
...@@ -107,17 +109,29 @@ def add_parser_arguments(self): ...@@ -107,17 +109,29 @@ def add_parser_arguments(self):
@abstractmethod @abstractmethod
def _init_distributed_setting(self): def _init_distributed_setting(self):
"""Initialize the distributed library and bind the worker to GPU.""" """Initialize the distributed library and bind the worker to GPU.
Return:
True if distributed library is initialized successfully.
"""
pass pass
@abstractmethod @abstractmethod
def _generate_dataset(self): def _generate_dataset(self):
"""Generate dataset for benchmarking according to shape info.""" """Generate dataset for benchmarking according to shape info.
Return:
True if dataset is created successfully.
"""
pass pass
@abstractmethod @abstractmethod
def _init_dataloader(self): def _init_dataloader(self):
"""Initialize the distributed dataloader.""" """Initialize the dataloader.
Return:
True if dataloader is created successfully.
"""
pass pass
def _preprocess(self): def _preprocess(self):
...@@ -126,18 +140,30 @@ def _preprocess(self): ...@@ -126,18 +140,30 @@ def _preprocess(self):
Return: Return:
True if _preprocess() succeed. True if _preprocess() succeed.
""" """
ret = super()._preprocess() if not super()._preprocess():
if not ret: return False
if not self._init_distributed_setting():
self._result.set_return_code(ReturnCode.DISTRIBUTED_SETTING_INIT_FAILURE)
return False
if not self._generate_dataset():
self._result.set_return_code(ReturnCode.DATASET_GENERATION_FAILURE)
return False
if not self._init_dataloader():
self._result.set_return_code(ReturnCode.DATALOADER_INIT_FAILURE)
return False return False
self._init_distributed_setting()
self._generate_dataset()
self._init_dataloader()
return True return True
@abstractmethod @abstractmethod
def _create_optimizer(self): def _create_optimizer(self):
"""Create the optimzier instance used for training.""" """Create the optimzier instance used for training and wrap with distributed library if need.
Return:
True if optimizer instance is created successfully.
"""
pass pass
@abstractmethod @abstractmethod
...@@ -158,8 +184,14 @@ def __train(self, precision): ...@@ -158,8 +184,14 @@ def __train(self, precision):
Return: Return:
True if step_times list is not empty. True if step_times list is not empty.
""" """
self._create_model(precision) if not self._create_model(precision):
self._create_optimizer() self._result.set_return_code(ReturnCode.MODEL_CREATION_FAILURE)
return False
if not self._create_optimizer():
self._result.set_return_code(ReturnCode.OPTIMIZER_CREATION_FAILURE)
return False
# The unit of step time should be millisecond. # The unit of step time should be millisecond.
step_times = self._train_step(precision) step_times = self._train_step(precision)
if len(step_times) == 0: if len(step_times) == 0:
......
# Copyright (c) Microsoft Corporation. # Copyright (c) Microsoft Corporation.
# Licensed under the MIT license. # Licensed under the MIT license.
"""A module for unified context of benchmarks.""" """A module for statuses of benchmarks."""
import enum from superbench.benchmarks.context import Enum
class Enum(enum.Enum):
"""Customized Enum class."""
@classmethod
def get_values(cls):
"""Return the value list."""
values = [item.value for item in cls]
return values
class ReturnCode(Enum): class ReturnCode(Enum):
...@@ -26,3 +17,8 @@ class ReturnCode(Enum): ...@@ -26,3 +17,8 @@ class ReturnCode(Enum):
NO_SUPPORTED_PRECISION = 10 NO_SUPPORTED_PRECISION = 10
MODEL_TRAIN_FAILURE = 11 MODEL_TRAIN_FAILURE = 11
MODEL_INFERENCE_FAILURE = 12 MODEL_INFERENCE_FAILURE = 12
DISTRIBUTED_SETTING_INIT_FAILURE = 13
DATASET_GENERATION_FAILURE = 14
DATALOADER_INIT_FAILURE = 15
OPTIMIZER_CREATION_FAILURE = 16
MODEL_CREATION_FAILURE = 17
...@@ -41,23 +41,23 @@ def add_parser_arguments(self): ...@@ -41,23 +41,23 @@ def add_parser_arguments(self):
def _init_distributed_setting(self): def _init_distributed_setting(self):
"""Initialize the distributed library and bind the worker to GPU.""" """Initialize the distributed library and bind the worker to GPU."""
pass return True
def _generate_dataset(self): def _generate_dataset(self):
"""Generate dataset for benchmarking according to shape info.""" """Generate dataset for benchmarking according to shape info."""
pass return True
def _init_dataloader(self): def _init_dataloader(self):
"""Initialize the distributed dataloader.""" """Initialize the distributed dataloader."""
pass return True
def _create_optimizer(self): def _create_optimizer(self):
"""Create the optimzier instance used for training.""" """Create the optimzier instance used for training."""
pass return True
def _create_model(self, precision): def _create_model(self, precision):
"""Construct the model for benchmarking.""" """Construct the model for benchmarking."""
pass return True
def _train_step(self, precision): def _train_step(self, precision):
"""Define the training process. """Define the training process.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment