Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
f22bb3f2
Unverified
Commit
f22bb3f2
authored
Jun 28, 2021
by
guoshzhao
Committed by
GitHub
Jun 28, 2021
Browse files
Benchmarks: Add Configuration - Add validation config file for azure NDv4. (#103)
* add config file for ndv4.
parent
9c748527
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
108 additions
and
1 deletion
+108
-1
superbench/benchmarks/model_benchmarks/model_base.py
superbench/benchmarks/model_benchmarks/model_base.py
+5
-1
superbench/config/azure_ndv4.yaml
superbench/config/azure_ndv4.yaml
+103
-0
No files found.
superbench/benchmarks/model_benchmarks/model_base.py
View file @
f22bb3f2
...
@@ -164,7 +164,11 @@ def _preprocess(self):
...
@@ -164,7 +164,11 @@ def _preprocess(self):
return
False
return
False
self
.
_judge_gpu_availability
()
self
.
_judge_gpu_availability
()
logger
.
info
(
'GPU availablility - model: {}, availablility: {}.'
.
format
(
self
.
_name
,
self
.
_gpu_available
))
logger
.
info
(
'Model placement - model: {}, GPU availablility: {}, pin memory: {}.'
.
format
(
self
.
_name
,
self
.
_gpu_available
,
self
.
_args
.
pin_memory
)
)
if
not
self
.
_init_distributed_setting
():
if
not
self
.
_init_distributed_setting
():
self
.
_result
.
set_return_code
(
ReturnCode
.
DISTRIBUTED_SETTING_INIT_FAILURE
)
self
.
_result
.
set_return_code
(
ReturnCode
.
DISTRIBUTED_SETTING_INIT_FAILURE
)
...
...
superbench/config/azure_ndv4.yaml
0 → 100644
View file @
f22bb3f2
# SuperBench Config
superbench
:
enable
:
null
var
:
default_local_mode
:
&default_local_mode
enable
:
true
modes
:
-
name
:
local
proc_num
:
8
prefix
:
CUDA_VISIBLE_DEVICES={proc_rank}
parallel
:
yes
default_pytorch_mode
:
&default_pytorch_mode
enable
:
true
modes
:
-
name
:
torch.distributed
proc_num
:
8
node_num
:
1
frameworks
:
-
pytorch
common_model_config
:
&common_model_config
duration
:
0
num_warmup
:
64
num_steps
:
2048
sample_count
:
8192
batch_size
:
32
precision
:
-
float32
-
float16
model_action
:
-
train
pin_memory
:
yes
benchmarks
:
kernel-launch
:
<<
:
*default_local_mode
gemm-flops
:
<<
:
*default_local_mode
cudnn-function
:
<<
:
*default_local_mode
cublas-function
:
<<
:
*default_local_mode
matmul
:
<<
:
*default_local_mode
frameworks
:
-
pytorch
sharding-matmul
:
<<
:
*default_pytorch_mode
computation-communication-overlap
:
<<
:
*default_pytorch_mode
gpt_models
:
<<
:
*default_pytorch_mode
models
:
-
gpt2-large
parameters
:
<<
:
*common_model_config
batch_size
:
8
seq_len
:
224
bert_models
:
<<
:
*default_pytorch_mode
models
:
-
bert-base
-
bert-large
parameters
:
<<
:
*common_model_config
seq_len
:
224
lstm_models
:
<<
:
*default_pytorch_mode
models
:
-
lstm
parameters
:
<<
:
*common_model_config
batch_size
:
224
input_size
:
224
hidden_size
:
1000
seq_len
:
32
pin_memory
:
no
resnet_models
:
<<
:
*default_pytorch_mode
models
:
-
resnet50
-
resnet101
-
resnet152
parameters
:
<<
:
*common_model_config
batch_size
:
192
num_steps
:
512
densenet_models
:
<<
:
*default_pytorch_mode
models
:
-
densenet169
-
densenet201
parameters
:
<<
:
*common_model_config
pin_memory
:
no
vgg_models
:
<<
:
*default_pytorch_mode
models
:
-
vgg11
-
vgg13
-
vgg16
-
vgg19
parameters
:
<<
:
*common_model_config
pin_memory
:
no
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment