benchmark_train.sh 9.13 KB
Newer Older
1
2
3
#!/bin/bash
source test_tipc/common_func.sh

LDOUBLEV's avatar
LDOUBLEV committed
4
5
6
7
8
9
10
11
# set env
python=python
export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
export model_commit=$(git log|head -n1|awk '{print $2}') 
export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
export frame_version=${str_tmp%%.post*}
export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)

12
13
# run benchmark sh 
# Usage:
LDOUBLEV's avatar
LDOUBLEV committed
14
# bash run_benchmark_train.sh config.txt params
LDOUBLEV's avatar
LDOUBLEV committed
15
16
# or 
# bash run_benchmark_train.sh config.txt
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

function func_parser_params(){
    strs=$1
    IFS="="
    array=(${strs})
    tmp=${array[1]}
    echo ${tmp}
}

function func_sed_params(){
    filename=$1
    line=$2
    param_value=$3
    params=`sed -n "${line}p" $filename`
    IFS=":"
    array=(${params})
    key=${array[0]}
    value=${array[1]}
LDOUBLEV's avatar
fix bug  
LDOUBLEV committed
35

36
37
38
39
40
41
42
43
44
    new_params="${key}:${param_value}"
    IFS=";"
    cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
    eval $cmd
}

function set_gpu_id(){
    string=$1
    _str=${string:1:6}
LDOUBLEV's avatar
LDOUBLEV committed
45
    IFS="C"
46
    arr=(${_str})
LDOUBLEV's avatar
LDOUBLEV committed
47
    M=${arr[0]}
48
    P=${arr[1]}
LDOUBLEV's avatar
LDOUBLEV committed
49
50
    gn=`expr $P - 1`
    gpu_num=`expr $gn / $M`
51
52
53
54
    seq=`seq -s "," 0 $gpu_num`
    echo $seq
}

LDOUBLEV's avatar
LDOUBLEV committed
55
56
57
58
59
60
61
function get_repo_name(){
    IFS=";"
    cur_dir=$(pwd)
    IFS="/"
    arr=(${cur_dir})
    echo ${arr[-1]}
}
62

LDOUBLEV's avatar
LDOUBLEV committed
63
FILENAME=$1
LDOUBLEV's avatar
LDOUBLEV committed
64
65
66
67
68
# copy FILENAME as new
new_filename="./test_tipc/benchmark_train.txt"
cmd=`yes|cp $FILENAME $new_filename`
FILENAME=$new_filename
# MODE must be one of ['benchmark_train']
LDOUBLEV's avatar
LDOUBLEV committed
69
MODE=$2
LDOUBLEV's avatar
LDOUBLEV committed
70
PARAMS=$3
LDOUBLEV's avatar
LDOUBLEV committed
71
# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt  benchmark_train dynamic_bs8_null_DP_N1C1
LDOUBLEV's avatar
LDOUBLEV committed
72
IFS=$'\n'
LDOUBLEV's avatar
LDOUBLEV committed
73
74
75
76
77
78
79
80
# parser params from train_benchmark.txt
dataline=`cat $FILENAME`
# parser params
IFS=$'\n'
lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}")

# 获取benchmark_params所在的行数
LDOUBLEV's avatar
LDOUBLEV committed
81
line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
LDOUBLEV's avatar
LDOUBLEV committed
82
# for train log parser
LDOUBLEV's avatar
LDOUBLEV committed
83
batch_size=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
84
line_num=`expr $line_num + 1`
LDOUBLEV's avatar
LDOUBLEV committed
85
fp_items=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
86
line_num=`expr $line_num + 1`
LDOUBLEV's avatar
LDOUBLEV committed
87
epoch=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
88
89

line_num=`expr $line_num + 1`
LDOUBLEV's avatar
LDOUBLEV committed
90
91
92
profile_option_key=$(func_parser_key "${lines[line_num]}")
profile_option_params=$(func_parser_value "${lines[line_num]}")
profile_option="${profile_option_key}:${profile_option_params}"
LDOUBLEV's avatar
LDOUBLEV committed
93
94
95

line_num=`expr $line_num + 1`
flags_value=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
96
# set flags
LDOUBLEV's avatar
LDOUBLEV committed
97
98
99
100
101
102
103
IFS=";"
flags_list=(${flags_value})
for _flag in ${flags_list[*]}; do
    cmd="export ${_flag}"
    eval $cmd
done

LDOUBLEV's avatar
LDOUBLEV committed
104
105
106
107
108
# set log_name
repo_name=$(get_repo_name )
SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
mkdir -p "${SAVE_LOG}/benchmark_log/"
status_log="${SAVE_LOG}/benchmark_log/results.log"
LDOUBLEV's avatar
set env  
LDOUBLEV committed
109

LDOUBLEV's avatar
LDOUBLEV committed
110
111
112
113
114
115
116
117
118
# The number of lines in which train params can be replaced.
line_python=3
line_gpuid=4
line_precision=6
line_epoch=7
line_batchsize=9
line_profile=13
line_eval_py=24
line_export_py=30
LDOUBLEV's avatar
set env  
LDOUBLEV committed
119

LDOUBLEV's avatar
LDOUBLEV committed
120
121
122
func_sed_params "$FILENAME" "${line_eval_py}" "null"
func_sed_params "$FILENAME" "${line_export_py}" "null"
func_sed_params "$FILENAME" "${line_python}"  "$python"
LDOUBLEV's avatar
LDOUBLEV committed
123

LDOUBLEV's avatar
LDOUBLEV committed
124
125
126
127
128
129
130
131
# if params
if  [ ! -n "$PARAMS" ] ;then
    # PARAMS input is not a word.
    IFS="|"
    batch_size_list=(${batch_size})
    fp_items_list=(${fp_items})
    device_num_list=(N1C4)
    run_mode="DP"
LDOUBLEV's avatar
LDOUBLEV committed
132
else
LDOUBLEV's avatar
LDOUBLEV committed
133
    # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
LDOUBLEV's avatar
LDOUBLEV committed
134
135
136
137
138
139
    IFS="_"
    params_list=(${PARAMS})
    model_type=${params_list[0]}
    batch_size=${params_list[1]}
    batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
    precision=${params_list[2]}
LDOUBLEV's avatar
LDOUBLEV committed
140
141
142
    # run_process_type=${params_list[3]}
    run_mode=${params_list[3]}
    device_num=${params_list[4]}
LDOUBLEV's avatar
LDOUBLEV committed
143
144
145
146
147
148
149
150
151
    IFS=";"

    if [ ${precision} = "null" ];then
        precision="fp32"
    fi

    fp_items_list=($precision)
    batch_size_list=($batch_size)
    device_num_list=($device_num)
LDOUBLEV's avatar
LDOUBLEV committed
152
fi
153

LDOUBLEV's avatar
LDOUBLEV committed
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
IFS="|"
for batch_size in ${batch_size_list[*]}; do 
    for precision in ${fp_items_list[*]}; do
        for device_num in ${device_num_list[*]}; do
            # sed batchsize and precision
            func_sed_params "$FILENAME" "${line_precision}" "$precision"
            func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
            func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
            gpu_id=$(set_gpu_id $device_num)

            if [ ${#gpu_id} -le 1 ];then
                run_process_type="SingleP"
                log_path="$SAVE_LOG/profiling_log"
                mkdir -p $log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
                # set profile_option params
                tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`

                # run test_train_inference_python.sh
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                eval $cmd
                eval "cat ${log_path}/${log_name}"

                # without profile
                log_path="$SAVE_LOG/train_log"
                speed_log_path="$SAVE_LOG/index"
                mkdir -p $log_path
                mkdir -p $speed_log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed profile_id as null
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                job_bt=`date '+%Y%m%d%H%M%S'`
                eval $cmd
                job_et=`date '+%Y%m%d%H%M%S'`
                export model_run_time=$((${job_et}-${job_bt}))
                eval "cat ${log_path}/${log_name}"

                # parser log
                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
                        --model_name ${_model_name} \
                        --base_batch_size ${batch_size} \
                        --run_mode ${run_mode} \
                        --run_process_type ${run_process_type} \
                        --fp_item ${precision} \
                        --keyword ips: \
                        --skip_steps 2 \
                        --device_num ${device_num} \
                        --speed_unit samples/s \
                        --convergence_key loss: "
                echo $cmd
                eval $cmd
                last_status=${PIPESTATUS[0]}
                status_check $last_status "${cmd}" "${status_log}"
            else
                IFS=";"
                unset_env=`unset CUDA_VISIBLE_DEVICES`
                run_process_type="MultiP"
                log_path="$SAVE_LOG/train_log"
                speed_log_path="$SAVE_LOG/index"
                mkdir -p $log_path
                mkdir -p $speed_log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
                func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id"  # sed used gpu_id 
                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed --profile_option as null
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                job_bt=`date '+%Y%m%d%H%M%S'`
                eval $cmd
                job_et=`date '+%Y%m%d%H%M%S'`
                export model_run_time=$((${job_et}-${job_bt}))
                eval "cat ${log_path}/${log_name}"
                # parser log
                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
                
                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
                        --model_name ${_model_name} \
                        --base_batch_size ${batch_size} \
                        --run_mode ${run_mode} \
                        --run_process_type ${run_process_type} \
                        --fp_item ${precision} \
                        --keyword ips: \
                        --skip_steps 2 \
                        --device_num ${device_num} \
                        --speed_unit images/s \
                        --convergence_key loss: "
                echo $cmd
                eval $cmd
                last_status=${PIPESTATUS[0]}
                status_check $last_status "${cmd}" "${status_log}"
            fi
        done
    done
LDOUBLEV's avatar
fix bug  
LDOUBLEV committed
254
done