benchmark_trainv2.sh 6.95 KB
Newer Older
LDOUBLEV's avatar
LDOUBLEV committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/bin/bash
source test_tipc/common_func.sh

# run benchmark sh 
# Usage:
# bash run_benchmark_train.sh config.txt params

function func_parser_params(){
    strs=$1
    IFS="="
    array=(${strs})
    tmp=${array[1]}
    echo ${tmp}
}

function func_sed_params(){
    filename=$1
    line=$2
    param_value=$3
    params=`sed -n "${line}p" $filename`
    IFS=":"
    array=(${params})
    key=${array[0]}
    value=${array[1]}
    if [[ $value =~ 'benchmark_train' ]];then
        IFS='='
        _val=(${value})
        param_value="${_val[0]}=${param_value}"
    fi
    new_params="${key}:${param_value}"
    IFS=";"
    cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
    eval $cmd
}

function set_gpu_id(){
    string=$1
    _str=${string:1:6}
    IFS="C"
    arr=(${_str})
    M=${arr[0]}
    P=${arr[1]}
    gn=`expr $P - 1`
    gpu_num=`expr $gn / $M`
    seq=`seq -s "," 0 $gpu_num`
    echo $seq
}

function get_repo_name(){
    IFS=";"
    cur_dir=$(pwd)
    IFS="/"
    arr=(${cur_dir})
    echo ${arr[-1]}
}

FILENAME=$1
# MODE be one of ['benchmark_train']
MODE=$2
params=$3
# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2.0/train_benchmark.txt  benchmark_train dynamic_bs8_null_SingleP_DP_N1C1
IFS="\n"

# parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_process_type}_${run_mode}_${device_num}
IFS="_"
params_list=(${params})
model_type=${params_list[0]}
batch_size=${params_list[1]}
batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
precision=${params_list[2]}
run_process_type=${params_list[3]}
run_mode=${params_list[4]}
device_num=${params_list[5]}
device_num_copy=$device_num
IFS=";"


# sed batchsize and precision
func_sed_params "$FILENAME" "6" "$precision"
func_sed_params "$FILENAME" "9" "$batch_size"

# parser params from train_benchmark.txt
dataline=`cat $FILENAME`
# parser params
IFS=$'\n'
lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}")

# 获取benchmark_params所在的行数
line_num=`grep -n "benchmark_params" $FILENAME  | cut -d ":" -f 1`
# for train log parser
line_num=`expr $line_num + 1`

profile_option_key=$(func_parser_key "${lines[line_num]}")
profile_option_params=$(func_parser_value "${lines[line_num]}")
profile_option="${profile_option_key}:${profile_option_params}"

line_num=`expr $line_num + 1`
flags_value=$(func_parser_value "${lines[line_num]}")

gpu_id=$(set_gpu_id $device_num)
repo_name=$(get_repo_name )

SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
status_log="${SAVE_LOG}/benchmark_log/results.log"

# set export 
IFS=";"
flags_list=(${flags_value})
for _flag in ${flags_list[*]}; do
    cmd="export ${_flag}"
    eval $cmd
done

if [ ${precision} = "null" ];then
    precision="fp32"
fi

# set env
LDOUBLEV's avatar
LDOUBLEV committed
120
python=python
LDOUBLEV's avatar
LDOUBLEV committed
121
122
123
124
125
126
127
128
129
130
131
export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
export model_commit=$(git log|head -n1|awk '{print $2}') 
export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
export frame_version=${str_tmp%%.post*}
export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)

# set eval and export as null
# line eval_py: 24
# line export_py: 30
func_sed_params "$FILENAME" "24" "null"
func_sed_params "$FILENAME" "30" "null"
LDOUBLEV's avatar
LDOUBLEV committed
132
func_sed_params "$FILENAME" "3"  "python"
LDOUBLEV's avatar
LDOUBLEV committed
133
134
135
136
137
138
139
140
141
142


if [ ${#gpu_id} -le 1 ];then
    log_path="$SAVE_LOG/profiling_log"
    mkdir -p $log_path
    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
    func_sed_params "$FILENAME" "4" "0"  # sed used gpu_id 
    # set profile_option params
    IFS=";"
    cmd="sed -i '13s/.*/${profile_option}/' '${FILENAME}'"
LDOUBLEV's avatar
LDOUBLEV committed
143
    eval $cmd
LDOUBLEV's avatar
LDOUBLEV committed
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

    # run test_train_inference_python.sh
    cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
    echo $cmd
    eval $cmd
    eval "cat ${log_path}/${log_name}"

    # without profile
    log_path="$SAVE_LOG/train_log"
    speed_log_path="$SAVE_LOG/index"
    mkdir -p $log_path
    mkdir -p $speed_log_path
    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
    speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
    func_sed_params "$FILENAME" "13" "null"  # sed profile_id as null
    cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
    echo $cmd
    job_bt=`date '+%Y%m%d%H%M%S'`
    eval $cmd
    job_et=`date '+%Y%m%d%H%M%S'`
    export model_run_time=$((${job_et}-${job_bt}))
    eval "cat ${log_path}/${log_name}"

    # parser log
    _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
LDOUBLEV's avatar
LDOUBLEV committed
169
    cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
LDOUBLEV's avatar
LDOUBLEV committed
170
171
172
173
174
175
            --speed_log_file '${speed_log_path}/${speed_log_name}' \
            --model_name ${_model_name} \
            --base_batch_size ${batch_size} \
            --run_mode ${run_mode} \
            --run_process_type ${run_process_type} \
            --fp_item ${precision} \
LDOUBLEV's avatar
LDOUBLEV committed
176
            --keyword samples/s: \
LDOUBLEV's avatar
LDOUBLEV committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
            --skip_steps 2 \
            --device_num ${device_num} \
            --speed_unit images/s \
            --convergence_key loss: "
    echo $cmd
    eval $cmd
    last_status=${PIPESTATUS[0]}
    status_check $last_status "${cmd}" "${status_log}"

else
    unset_env=`unset CUDA_VISIBLE_DEVICES`
    log_path="$SAVE_LOG/train_log"
    speed_log_path="$SAVE_LOG/index"
    mkdir -p $log_path
    mkdir -p $speed_log_path
    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
    speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
    func_sed_params "$FILENAME" "4" "$gpu_id"  # sed used gpu_id 
    func_sed_params "$FILENAME" "13" "null"  # sed --profile_option as null
    cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
    echo $cmd
    job_bt=`date '+%Y%m%d%H%M%S'`
    eval $cmd
    job_et=`date '+%Y%m%d%H%M%S'`
    export model_run_time=$((${job_et}-${job_bt}))
    eval "cat ${log_path}/${log_name}"
    # parser log
    _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
    
LDOUBLEV's avatar
LDOUBLEV committed
206
    cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
LDOUBLEV's avatar
LDOUBLEV committed
207
208
209
210
211
212
            --speed_log_file '${speed_log_path}/${speed_log_name}' \
            --model_name ${_model_name} \
            --base_batch_size ${batch_size} \
            --run_mode ${run_mode} \
            --run_process_type ${run_process_type} \
            --fp_item ${precision} \
LDOUBLEV's avatar
LDOUBLEV committed
213
            --keyword samples/s: \
LDOUBLEV's avatar
LDOUBLEV committed
214
215
216
217
218
219
220
221
222
223
            --skip_steps 2 \
            --device_num ${device_num} \
            --speed_unit images/s \
            --convergence_key loss: "
    echo $cmd
    eval $cmd
    last_status=${PIPESTATUS[0]}
    status_check $last_status "${cmd}" "${status_log}"
fi