Merge remote-tracking branch 'origin/dygraph' into dygraph

84ce34bd · Leif · f470ede8 · 529133fb · 84ce34bd · 84ce34bd
Commit 84ce34bd authored Oct 29, 2021 by Leif
9 changed files
--- a/PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt
+++ b/PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt
--- a/PTDN/results/python_ppocr_det_mobile_results_fp16.txt
+++ b/PTDN/results/python_ppocr_det_mobile_results_fp16.txt
--- a/PTDN/results/python_ppocr_det_mobile_results_fp32.txt
+++ b/PTDN/results/python_ppocr_det_mobile_results_fp32.txt
--- a/PTDN/test_inference_cpp.sh
+++ b/PTDN/test_inference_cpp.sh
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh

 FILENAME=$1
 dataline=$(awk 'NR==52, NR==66{print}'  $FILENAME)
@@ -35,7 +35,7 @@ cpp_benchmark_key=$(func_parser_key "${lines[14]}")
 cpp_benchmark_value=$(func_parser_value "${lines[14]}")


-LOG_PATH="./tests/output"
+LOG_PATH="./test_tipc/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_cpp.log"


--- a/test_tipc/test_lite.sh
+++ b/test_tipc/test_lite.sh
+#!/bin/bash
+source ./common_func.sh
+export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
+
+FILENAME=$1
+dataline=$(awk 'NR==101, NR==110{print}'  $FILENAME)
+echo $dataline
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# parser lite inference
+lite_inference_cmd=$(func_parser_value "${lines[1]}")
+lite_model_dir_list=$(func_parser_value "${lines[2]}")
+lite_cpu_threads_list=$(func_parser_value "${lines[3]}")
+lite_batch_size_list=$(func_parser_value "${lines[4]}")
+lite_power_mode_list=$(func_parser_value "${lines[5]}")
+lite_infer_img_dir_list=$(func_parser_value "${lines[6]}")
+lite_config_dir=$(func_parser_value "${lines[7]}")
+lite_rec_dict_dir=$(func_parser_value "${lines[8]}")
+lite_benchmark_value=$(func_parser_value "${lines[9]}")
+
+LOG_PATH="./output"
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results.log"
+
+
+function func_lite(){
+    IFS='|'
+    _script=$1
+    _lite_model=$2
+    _log_path=$3
+    _img_dir=$4
+    _config=$5
+    if [[ $lite_model =~ "slim" ]]; then
+        precision="INT8"
+    else
+        precision="FP32"
+    fi
+    is_single_img=$(echo $_img_dir | grep -E ".jpg|.jpeg|.png|.JPEG|.JPG")
+    if [[ "$is_single_img" != "" ]]; then
+        single_img="True"
+    else
+        single_img="False"
+    fi
+
+    # lite inference
+    for num_threads in ${lite_cpu_threads_list[*]}; do
+        for power_mode in ${lite_power_mode_list[*]}; do
+            for batchsize in ${lite_batch_size_list[*]}; do
+                model_name=$(echo $lite_model | awk -F "/" '{print $NF}')
+                _save_log_path="${_log_path}/lite_${model_name}_precision_${precision}_batchsize_${batchsize}_threads_${num_threads}_powermode_${power_mode}_singleimg_${single_img}.log"
+                command="${_script} ${lite_model} ${precision} ${num_threads} ${batchsize} ${power_mode} ${_img_dir} ${_config} ${lite_benchmark_value} > ${_save_log_path} 2>&1"
+                eval ${command}
+                status_check $? "${command}" "${status_log}"
+            done
+        done
+    done
+}
+
+
+echo "################### run test ###################"
+IFS="|"
+for lite_model in ${lite_model_dir_list[*]}; do
+    #run lite inference
+    for img_dir in ${lite_infer_img_dir_list[*]}; do
+        func_lite "${lite_inference_cmd}" "${lite_model}" "${LOG_PATH}" "${img_dir}" "${lite_config_dir}"
+    done
+done
--- a/PTDN/test_serving.sh
+++ b/PTDN/test_serving.sh
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh

 FILENAME=$1
 dataline=$(awk 'NR==67, NR==83{print}'  $FILENAME)
@@ -36,8 +36,8 @@ web_precision_key=$(func_parser_key "${lines[15]}")
 web_precision_list=$(func_parser_value "${lines[15]}")
 pipeline_py=$(func_parser_value "${lines[16]}")

-LOG_PATH="../../tests/output"
-mkdir -p ./tests/output
+LOG_PATH="../../test_tipc/output"
+mkdir -p ./test_tipc/output
 status_log="${LOG_PATH}/results_serving.log"

 function func_serving(){

--- a/PTDN/test_train_inference_python.sh
+++ b/PTDN/test_train_inference_python.sh
 #!/bin/bash
-source tests/common_func.sh
+source test_tipc/common_func.sh

 FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer']
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
 MODE=$2

 dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
@@ -59,6 +59,7 @@ export_key1=$(func_parser_key "${lines[33]}")
 export_value1=$(func_parser_value "${lines[33]}")
 export_key2=$(func_parser_key "${lines[34]}")
 export_value2=$(func_parser_value "${lines[34]}")
+inference_dir=$(func_parser_value "${lines[35]}")

 # parser inference model 
 infer_model_dir_list=$(func_parser_value "${lines[36]}")
@@ -88,7 +89,7 @@ infer_key1=$(func_parser_key "${lines[50]}")
 infer_value1=$(func_parser_value "${lines[50]}")

 # parser klquant_infer
-if [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "klquant_whole_infer" ]; then
    dataline=$(awk 'NR==82, NR==98{print}'  $FILENAME)
    lines=(${dataline})
    # parser inference model 
@@ -119,7 +120,7 @@ if [ ${MODE} = "klquant_infer" ]; then
    infer_value1=$(func_parser_value "${lines[15]}")
 fi

-LOG_PATH="./tests/output"
+LOG_PATH="./test_tipc/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_python.log"

@@ -202,7 +203,7 @@ function func_inference(){
    done
 }

-if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then
+if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
    GPUID=$3
    if [ ${#GPUID} -le 0 ];then
        env=" "
@@ -245,6 +246,7 @@ else
    for gpu in ${gpu_list[*]}; do
        use_gpu=${USE_GPU_KEY[Count]}
        Count=$(($Count + 1))
+        ips=""
        if [ ${gpu} = "-1" ];then
            env=""
        elif [ ${#gpu} -le 1 ];then
@@ -264,6 +266,11 @@ else
            env=" "
        fi
        for autocast in ${autocast_list[*]}; do 
+            if [ ${autocast} = "amp" ]; then
+                set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
+            else
+                set_amp_config=" "
+            fi          
            for trainer in ${trainer_list[*]}; do 
                flag_quant=False
                if [ ${trainer} = ${pact_key} ]; then
@@ -290,7 +297,6 @@ else
                if [ ${run_train} = "null" ]; then
                    continue
                fi
-                
                set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
                set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
@@ -306,11 +312,11 @@ else

                set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
                if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
-                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} "
-                elif [ ${#gpu} -le 15 ];then  # train with multi-gpu
-                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
+                elif [ ${#ips} -le 26 ];then  # train with multi-gpu
+                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                else     # train with multi-machine
-                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${set_use_gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                fi
                # run train
                eval "unset CUDA_VISIBLE_DEVICES"
@@ -342,7 +348,13 @@ else
                    #run inference
                    eval $env
                    save_infer_path="${save_log}"
-                    func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+                    if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then
+                        infer_model_dir="${save_infer_path}/${inference_dir}"
+                    else
+                        infer_model_dir=${save_infer_path}
+                    fi
+                    func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+                    
                    eval "unset CUDA_VISIBLE_DEVICES"
                fi
            done  # done with:    for trainer in ${trainer_list[*]}; do 

--- a/tools/program.py
+++ b/tools/program.py
@@ -159,7 +159,8 @@ def train(config,
          eval_class,
          pre_best_model_dict,
          logger,
-          vdl_writer=None):
+          vdl_writer=None,
+          scaler=None):
    cal_metric_during_train = config['Global'].get('cal_metric_during_train',
                                                   False)
    log_smooth_window = config['Global']['log_smooth_window']
@@ -211,33 +212,49 @@ def train(config,
    for epoch in range(start_epoch, epoch_num + 1):
        train_dataloader = build_dataloader(
            config, 'Train', device, logger, seed=epoch)
-        train_batch_cost = 0.0
        train_reader_cost = 0.0
-        batch_sum = 0
-        batch_start = time.time()
+        train_run_cost = 0.0
+        total_samples = 0
+        reader_start = time.time()
        max_iter = len(train_dataloader) - 1 if platform.system(
        ) == "Windows" else len(train_dataloader)
        for idx, batch in enumerate(train_dataloader):
            profiler.add_profiler_step(profiler_options)
-            train_reader_cost += time.time() - batch_start
+            train_reader_cost += time.time() - reader_start
            if idx >= max_iter:
                break
            lr = optimizer.get_lr()
            images = batch[0]
            if use_srn:
                model_average = True
-            if model_type == 'table' or extra_input:
-                preds = model(images, data=batch[1:])
+
+            train_start = time.time()
+            # use amp
+            if scaler:
+                with paddle.amp.auto_cast():
+                    if model_type == 'table' or extra_input:
+                        preds = model(images, data=batch[1:])
+                    else:
+                        preds = model(images)
            else:
-                preds = model(images)
+                if model_type == 'table' or extra_input:
+                    preds = model(images, data=batch[1:])
+                else:
+                    preds = model(images)
            loss = loss_class(preds, batch)
            avg_loss = loss['loss']
-            avg_loss.backward()
-            optimizer.step()
+
+            if scaler:
+                scaled_avg_loss = scaler.scale(avg_loss)
+                scaled_avg_loss.backward()
+                scaler.minimize(optimizer, scaled_avg_loss)
+            else:
+                avg_loss.backward()
+                optimizer.step()
            optimizer.clear_grad()

-            train_batch_cost += time.time() - batch_start
-            batch_sum += len(images)
+            train_run_cost += time.time() - train_start
+            total_samples += len(images)

            if not isinstance(lr_scheduler, float):
                lr_scheduler.step()
@@ -268,12 +285,13 @@ def train(config,
                logs = train_stats.log()
                strs = 'epoch: [{}/{}], iter: {}, {}, reader_cost: {:.5f} s, batch_cost: {:.5f} s, samples: {}, ips: {:.5f}'.format(
                    epoch, epoch_num, global_step, logs, train_reader_cost /
-                    print_batch_step, train_batch_cost / print_batch_step,
-                    batch_sum, batch_sum / train_batch_cost)
+                    print_batch_step, (train_reader_cost + train_run_cost) /
+                    print_batch_step, total_samples,
+                    total_samples / (train_reader_cost + train_run_cost))
                logger.info(strs)
-                train_batch_cost = 0.0
                train_reader_cost = 0.0
-                batch_sum = 0
+                train_run_cost = 0.0
+                total_samples = 0
            # eval
            if global_step > start_eval_step and \
                    (global_step - start_eval_step) % eval_batch_step == 0 and dist.get_rank() == 0:
@@ -326,7 +344,7 @@ def train(config,
                                          global_step)
            global_step += 1
            optimizer.clear_grad()
-            batch_start = time.time()
+            reader_start = time.time()
        if dist.get_rank() == 0:
            save_model(
                model,
@@ -367,7 +385,11 @@ def eval(model,
    with paddle.no_grad():
        total_frame = 0.0
        total_time = 0.0
-        pbar = tqdm(total=len(valid_dataloader), desc='eval model:')
+        pbar = tqdm(
+            total=len(valid_dataloader),
+            desc='eval model:',
+            position=0,
+            leave=True)
        max_iter = len(valid_dataloader) - 1 if platform.system(
        ) == "Windows" else len(valid_dataloader)
        for idx, batch in enumerate(valid_dataloader):
@@ -436,8 +458,6 @@ def get_center(model, eval_dataloader, post_process_class):

        batch = [item.numpy() for item in batch]
        # Obtain usable results from post-processing methods
-        total_time += time.time() - start
-        # Evaluate the results of the current batch
        post_result = post_process_class(preds, batch[1])

        #update char_center
@@ -480,11 +500,6 @@ def preprocess(is_train=False):
        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
        'SEED'
    ]
-    windows_not_support_list = ['PSE']
-    if platform.system() == "Windows" and alg in windows_not_support_list:
-        logger.warning('{} is not support in Windows now'.format(
-            windows_not_support_list))
-        sys.exit()

    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'
    device = paddle.set_device(device)

--- a/tools/train.py
+++ b/tools/train.py
@@ -102,10 +102,27 @@ def main(config, device, logger, vdl_writer):
    if valid_dataloader is not None:
        logger.info('valid dataloader has {} iters'.format(
            len(valid_dataloader)))
+
+    use_amp = config["Global"].get("use_amp", False)
+    if use_amp:
+        AMP_RELATED_FLAGS_SETTING = {
+            'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
+            'FLAGS_max_inplace_grad_add': 8,
+        }
+        paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
+        scale_loss = config["Global"].get("scale_loss", 1.0)
+        use_dynamic_loss_scaling = config["Global"].get(
+            "use_dynamic_loss_scaling", False)
+        scaler = paddle.amp.GradScaler(
+            init_loss_scaling=scale_loss,
+            use_dynamic_loss_scaling=use_dynamic_loss_scaling)
+    else:
+        scaler = None
+
    # start train
    program.train(config, train_dataloader, valid_dataloader, device, model,
                  loss_class, optimizer, lr_scheduler, post_process_class,
-                  eval_class, pre_best_model_dict, logger, vdl_writer)
+                  eval_class, pre_best_model_dict, logger, vdl_writer, scaler)


 def test_reader(config, device, logger):