"git@developer.sourcefind.cn:zhiAn123/internlm2-math-7b.git" did not exist on "56b9ed08c89d8280e4df3aa3822a3bae0dac3256"
benchmark_train.sh 9 KB
Newer Older
1
2
3
#!/bin/bash
source test_tipc/common_func.sh

LDOUBLEV's avatar
LDOUBLEV committed
4
5
6
7
8
9
# set env
python=python
export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
export frame_version=${str_tmp%%.post*}
export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)

10
11
# run benchmark sh 
# Usage:
LDOUBLEV's avatar
LDOUBLEV committed
12
# bash run_benchmark_train.sh config.txt params
LDOUBLEV's avatar
LDOUBLEV committed
13
14
# or 
# bash run_benchmark_train.sh config.txt
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

function func_parser_params(){
    strs=$1
    IFS="="
    array=(${strs})
    tmp=${array[1]}
    echo ${tmp}
}

function func_sed_params(){
    filename=$1
    line=$2
    param_value=$3
    params=`sed -n "${line}p" $filename`
    IFS=":"
    array=(${params})
    key=${array[0]}
    value=${array[1]}
LDOUBLEV's avatar
fix bug  
LDOUBLEV committed
33

34
35
36
37
38
39
40
41
42
    new_params="${key}:${param_value}"
    IFS=";"
    cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
    eval $cmd
}

function set_gpu_id(){
    string=$1
    _str=${string:1:6}
LDOUBLEV's avatar
LDOUBLEV committed
43
    IFS="C"
44
    arr=(${_str})
LDOUBLEV's avatar
LDOUBLEV committed
45
    M=${arr[0]}
46
    P=${arr[1]}
LDOUBLEV's avatar
LDOUBLEV committed
47
48
    gn=`expr $P - 1`
    gpu_num=`expr $gn / $M`
49
50
51
52
    seq=`seq -s "," 0 $gpu_num`
    echo $seq
}

LDOUBLEV's avatar
LDOUBLEV committed
53
54
55
56
57
58
59
function get_repo_name(){
    IFS=";"
    cur_dir=$(pwd)
    IFS="/"
    arr=(${cur_dir})
    echo ${arr[-1]}
}
60

LDOUBLEV's avatar
LDOUBLEV committed
61
FILENAME=$1
LDOUBLEV's avatar
LDOUBLEV committed
62
63
64
65
66
# copy FILENAME as new
new_filename="./test_tipc/benchmark_train.txt"
cmd=`yes|cp $FILENAME $new_filename`
FILENAME=$new_filename
# MODE must be one of ['benchmark_train']
LDOUBLEV's avatar
LDOUBLEV committed
67
MODE=$2
LDOUBLEV's avatar
LDOUBLEV committed
68
PARAMS=$3
LDOUBLEV's avatar
LDOUBLEV committed
69
# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt  benchmark_train dynamic_bs8_null_DP_N1C1
LDOUBLEV's avatar
LDOUBLEV committed
70
IFS=$'\n'
LDOUBLEV's avatar
LDOUBLEV committed
71
72
73
74
75
76
77
78
# parser params from train_benchmark.txt
dataline=`cat $FILENAME`
# parser params
IFS=$'\n'
lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}")

# 获取benchmark_params所在的行数
LDOUBLEV's avatar
LDOUBLEV committed
79
line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
LDOUBLEV's avatar
LDOUBLEV committed
80
# for train log parser
LDOUBLEV's avatar
LDOUBLEV committed
81
batch_size=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
82
line_num=`expr $line_num + 1`
LDOUBLEV's avatar
LDOUBLEV committed
83
fp_items=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
84
line_num=`expr $line_num + 1`
LDOUBLEV's avatar
LDOUBLEV committed
85
epoch=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
86
87

line_num=`expr $line_num + 1`
LDOUBLEV's avatar
LDOUBLEV committed
88
89
90
profile_option_key=$(func_parser_key "${lines[line_num]}")
profile_option_params=$(func_parser_value "${lines[line_num]}")
profile_option="${profile_option_key}:${profile_option_params}"
LDOUBLEV's avatar
LDOUBLEV committed
91
92
93

line_num=`expr $line_num + 1`
flags_value=$(func_parser_value "${lines[line_num]}")
LDOUBLEV's avatar
LDOUBLEV committed
94
# set flags
LDOUBLEV's avatar
LDOUBLEV committed
95
96
97
98
99
100
101
IFS=";"
flags_list=(${flags_value})
for _flag in ${flags_list[*]}; do
    cmd="export ${_flag}"
    eval $cmd
done

LDOUBLEV's avatar
LDOUBLEV committed
102
103
104
105
106
# set log_name
repo_name=$(get_repo_name )
SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
mkdir -p "${SAVE_LOG}/benchmark_log/"
status_log="${SAVE_LOG}/benchmark_log/results.log"
LDOUBLEV's avatar
set env  
LDOUBLEV committed
107

LDOUBLEV's avatar
LDOUBLEV committed
108
109
110
111
112
113
114
115
116
# The number of lines in which train params can be replaced.
line_python=3
line_gpuid=4
line_precision=6
line_epoch=7
line_batchsize=9
line_profile=13
line_eval_py=24
line_export_py=30
LDOUBLEV's avatar
set env  
LDOUBLEV committed
117

LDOUBLEV's avatar
LDOUBLEV committed
118
119
120
func_sed_params "$FILENAME" "${line_eval_py}" "null"
func_sed_params "$FILENAME" "${line_export_py}" "null"
func_sed_params "$FILENAME" "${line_python}"  "$python"
LDOUBLEV's avatar
LDOUBLEV committed
121

LDOUBLEV's avatar
LDOUBLEV committed
122
123
124
125
126
127
128
129
# if params
if  [ ! -n "$PARAMS" ] ;then
    # PARAMS input is not a word.
    IFS="|"
    batch_size_list=(${batch_size})
    fp_items_list=(${fp_items})
    device_num_list=(N1C4)
    run_mode="DP"
LDOUBLEV's avatar
LDOUBLEV committed
130
else
LDOUBLEV's avatar
LDOUBLEV committed
131
    # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
LDOUBLEV's avatar
LDOUBLEV committed
132
133
134
135
136
137
    IFS="_"
    params_list=(${PARAMS})
    model_type=${params_list[0]}
    batch_size=${params_list[1]}
    batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
    precision=${params_list[2]}
LDOUBLEV's avatar
LDOUBLEV committed
138
139
140
    # run_process_type=${params_list[3]}
    run_mode=${params_list[3]}
    device_num=${params_list[4]}
LDOUBLEV's avatar
LDOUBLEV committed
141
142
143
144
145
146
147
148
149
    IFS=";"

    if [ ${precision} = "null" ];then
        precision="fp32"
    fi

    fp_items_list=($precision)
    batch_size_list=($batch_size)
    device_num_list=($device_num)
LDOUBLEV's avatar
LDOUBLEV committed
150
fi
151

LDOUBLEV's avatar
LDOUBLEV committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
IFS="|"
for batch_size in ${batch_size_list[*]}; do 
    for precision in ${fp_items_list[*]}; do
        for device_num in ${device_num_list[*]}; do
            # sed batchsize and precision
            func_sed_params "$FILENAME" "${line_precision}" "$precision"
            func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
            func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
            gpu_id=$(set_gpu_id $device_num)

            if [ ${#gpu_id} -le 1 ];then
                run_process_type="SingleP"
                log_path="$SAVE_LOG/profiling_log"
                mkdir -p $log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
                # set profile_option params
                tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`

                # run test_train_inference_python.sh
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                eval $cmd
                eval "cat ${log_path}/${log_name}"

                # without profile
                log_path="$SAVE_LOG/train_log"
                speed_log_path="$SAVE_LOG/index"
                mkdir -p $log_path
                mkdir -p $speed_log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed profile_id as null
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                job_bt=`date '+%Y%m%d%H%M%S'`
                eval $cmd
                job_et=`date '+%Y%m%d%H%M%S'`
                export model_run_time=$((${job_et}-${job_bt}))
                eval "cat ${log_path}/${log_name}"

                # parser log
                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
                        --model_name ${_model_name} \
                        --base_batch_size ${batch_size} \
                        --run_mode ${run_mode} \
                        --run_process_type ${run_process_type} \
                        --fp_item ${precision} \
                        --keyword ips: \
                        --skip_steps 2 \
                        --device_num ${device_num} \
                        --speed_unit samples/s \
                        --convergence_key loss: "
                echo $cmd
                eval $cmd
                last_status=${PIPESTATUS[0]}
                status_check $last_status "${cmd}" "${status_log}"
            else
                IFS=";"
                unset_env=`unset CUDA_VISIBLE_DEVICES`
                run_process_type="MultiP"
                log_path="$SAVE_LOG/train_log"
                speed_log_path="$SAVE_LOG/index"
                mkdir -p $log_path
                mkdir -p $speed_log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
                func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id"  # sed used gpu_id 
                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed --profile_option as null
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                job_bt=`date '+%Y%m%d%H%M%S'`
                eval $cmd
                job_et=`date '+%Y%m%d%H%M%S'`
                export model_run_time=$((${job_et}-${job_bt}))
                eval "cat ${log_path}/${log_name}"
                # parser log
                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
                
                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
                        --model_name ${_model_name} \
                        --base_batch_size ${batch_size} \
                        --run_mode ${run_mode} \
                        --run_process_type ${run_process_type} \
                        --fp_item ${precision} \
                        --keyword ips: \
                        --skip_steps 2 \
                        --device_num ${device_num} \
                        --speed_unit images/s \
                        --convergence_key loss: "
                echo $cmd
                eval $cmd
                last_status=${PIPESTATUS[0]}
                status_check $last_status "${cmd}" "${status_log}"
            fi
        done
    done
LDOUBLEV's avatar
fix bug  
LDOUBLEV committed
252
done