dist_train_kenel.sh 930 Bytes
Newer Older
lishj6's avatar
init  
lishj6 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/env bash
script  get_miopen_conv.log  #将输出的日志进行保存

export MIOPEN_ENABLE_LOGGING=1        # 打开MIOPEN  LOGGING日志 default =0
export MIOPEN_ENABLE_LOGGING_CMD=1    # 输出日志CMD信息 default =0
export MIOPEN_LOG_LEVEL=6             # 设置日志打印level default=0

CONFIG=$1
GPUS=$2
PORT=${PORT:-28509}

# 设置主节点地址(单机可省略)
export MASTER_ADDR="localhost"

PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
torchrun --nproc_per_node=$GPUS --master_port=$PORT \
    $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic


exit                                # 退出script
cat get_miopen_conv_{model_name}.log | grep "./bin/MIOpenDriver" |sort -n|uniq -c > miopen_conv_{model_name}.log
# 将会获得如下的conv log
# 抓取bn
cat get_miopen_conv_{model_name}.log | grep "./bin/MIOpenDriver bnorm" |sort -n|uniq -c > miopen_bn_{model_name}.log