# SuperBench Config superbench: enable: null benchmarks: kernel-launch: enable: true gemm-flops: enable: true cudnn-function: enable: true cublas-function: enable: true matmul: enable: true modes: - name: local proc_num: 8 prefix: CUDA_VISIBLE_DEVICES={proc_rank} parallel: no frameworks: - pytorch gpt_models: enable: true modes: - name: torch.distributed proc_num: 8 node_num: all frameworks: - pytorch models: - gpt2-small - gpt2-large parameters: duration: 0 num_warmup: 16 num_steps: 128 batch_size: 4 precision: - float32 - float16 model_action: - train - inference bert_models: enable: true modes: - name: torch.distributed proc_num: 8 node_num: all frameworks: - pytorch models: - bert-base - bert-large parameters: duration: 0 num_warmup: 16 num_steps: 128 batch_size: 16 precision: - float32 - float16 model_action: - train - inference lstm_models: enable: true modes: - name: torch.distributed proc_num: 8 node_num: all frameworks: - pytorch models: - lstm parameters: duration: 0 num_warmup: 16 num_steps: 128 batch_size: 128 precision: - float32 - float16 model_action: - train - inference cnn_models: enable: true modes: - name: torch.distributed proc_num: 8 node_num: all frameworks: - pytorch models: - resnet50 - resnet101 - resnet152 - densenet169 - densenet201 - vgg11 - vgg13 - vgg16 - vgg19 parameters: duration: 0 num_warmup: 16 num_steps: 128 batch_size: 128 precision: - float32 - float16 model_action: - train - inference