add new model

764b3a75 · Sugon_ldc · 764b3a75 · 764b3a75 · 764b3a75 · 764b3a75
Commit 764b3a75 authored Jun 07, 2023 by Sugon_ldc
20 changed files
--- a/examples/aishell/s0/conf/train_u2++_efficonformer_v1.yaml
+++ b/examples/aishell/s0/conf/train_u2++_efficonformer_v1.yaml
+# network architecture
+# encoder related
+encoder: efficientConformer
+encoder_conf:
+    activation_type: 'swish'
+    attention_heads: 8
+    causal: false
+    cnn_module_kernel: 15
+    cnn_module_norm: 'layer_norm'
+    dropout_rate: 0.1
+    input_layer: conv2d
+    linear_units: 2048
+    normalize_before: true
+    num_blocks: 12
+    output_size: 256
+    pos_enc_layer_type: 'rel_pos'
+    attention_dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    use_cnn_module: true
+    use_dynamic_chunk: true
+    use_dynamic_left_chunk: false
+    efficient_conf:
+        stride_layer_idx: [3]           # layer id with StrideConv
+        stride: [2]                     # stride size of each StrideConv
+        group_layer_idx: [0, 1, 2, 3]   # layer id with GroupedAttention
+        group_size: 3                   # group size of every GroupedAttention layer
+        stride_kernel: true             # true: recompute cnn kernels with stride
+
+# decoder related
+decoder: bitransformer
+decoder_conf:
+    attention_heads: 8
+    dropout_rate: 0.1
+    linear_units: 2048
+    num_blocks: 3
+    r_num_blocks: 3
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+    reverse_weight: 0.3
+
+# dataset related
+dataset_conf:
+    batch_conf:
+        batch_size: 16
+        batch_type: 'static'
+    fbank_conf:
+        num_mel_bins: 80
+        frame_shift: 10
+        frame_length: 25
+        dither: 1.0
+    filter_conf:
+        max_length: 40960
+        min_length: 0
+        token_max_length: 200
+        token_min_length: 1
+    resample_conf:
+        resample_rate: 16000
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 1500
+    sort: true
+    sort_conf:
+        sort_size: 500
+    spec_aug: true
+    spec_aug_conf:
+        num_t_mask: 2
+        num_f_mask: 2
+        max_t: 50
+        max_f: 10
+    spec_sub: true
+    spec_sub_conf:
+        num_t_sub: 3
+        max_t: 30
+    spec_trim: false
+    spec_trim_conf:
+        max_t: 50
+    speed_perturb: true
+
+grad_clip: 5
+accum_grad: 1
+max_epoch: 200
+log_interval: 100
+
+optim: adam
+optim_conf:
+    lr: 0.001
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 25000
--- a/examples/aishell/s0/conf/train_u2++_efficonformer_v2.yaml
+++ b/examples/aishell/s0/conf/train_u2++_efficonformer_v2.yaml
+# network architecture
+# encoder related
+encoder: efficientConformer
+encoder_conf:
+    activation_type: 'swish'
+    attention_heads: 8
+    causal: false
+    cnn_module_kernel: 15
+    cnn_module_norm: 'layer_norm'
+    dropout_rate: 0.1
+    input_layer: conv2d2
+    linear_units: 2048
+    normalize_before: true
+    num_blocks: 12
+    output_size: 256
+    pos_enc_layer_type: 'rel_pos'
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    use_cnn_module: true
+    use_dynamic_chunk: true
+    use_dynamic_left_chunk: false
+    efficient_conf:
+        stride_layer_idx: [3, 7]           # layer id with StrideConv
+        stride: [2, 2]                     # stride size of each StrideConv
+        group_layer_idx: [3, 7]            # layer id with GroupedAttention
+        group_size: 3                      # group size of every GroupedAttention layer
+        stride_kernel: false               # true: recompute cnn kernels with stride
+
+# decoder related
+decoder: bitransformer
+decoder_conf:
+    attention_heads: 8
+    dropout_rate: 0.1
+    linear_units: 2048
+    num_blocks: 3
+    positional_dropout_rate: 0.1
+    r_num_blocks: 3
+    self_attention_dropout_rate: 0.1
+    src_attention_dropout_rate: 0.1
+
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+    reverse_weight: 0.3
+
+# dataset related
+dataset_conf:
+    batch_conf:
+        batch_size: 16
+        batch_type: 'static'
+    fbank_conf:
+        dither: 1.0
+        frame_length: 25
+        frame_shift: 10
+        num_mel_bins: 80
+    filter_conf:
+        max_length: 40960
+        min_length: 0
+        max_output_input_ratio: 0.1
+        min_output_input_ratio: 0.005
+        token_max_length: 200
+        token_min_length: 1
+    resample_conf:
+        resample_rate: 16000
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 1500
+    sort: true
+    sort_conf:
+        sort_size: 500
+    spec_aug: true
+    spec_aug_conf:
+        num_t_mask: 2
+        num_f_mask: 2
+        max_t: 50
+        max_f: 10
+    spec_sub: true
+    spec_sub_conf:
+        num_t_sub: 3
+        max_t: 30
+    spec_trim: false
+    spec_trim_conf:
+        max_t: 50
+    speed_perturb: true
+
+grad_clip: 5
+accum_grad: 1
+max_epoch: 200
+log_interval: 100
+
+optim: adam
+optim_conf:
+    lr: 0.001
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 25000
--- a/examples/aishell/s0/conf/train_u2++_transformer.yaml
+++ b/examples/aishell/s0/conf/train_u2++_transformer.yaml
+# network architecture
+# encoder related
+encoder: transformer
+encoder_conf:
+    output_size: 256    # dimension of attention
+    attention_heads: 4
+    linear_units: 2048  # the number of units of position-wise feed forward
+    num_blocks: 12      # the number of encoder blocks
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.0
+    input_layer: conv2d # encoder architecture type
+    normalize_before: true
+    use_dynamic_chunk: true
+    use_dynamic_left_chunk: false
+
+# decoder related
+decoder: bitransformer
+decoder_conf:
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 3
+    r_num_blocks: 3
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+    reverse_weight: 0.3
+
+dataset_conf:
+    filter_conf:
+        max_length: 40960
+        min_length: 0
+        token_max_length: 200
+        token_min_length: 1
+    resample_conf:
+        resample_rate: 16000
+    speed_perturb: true
+    fbank_conf:
+        num_mel_bins: 80
+        frame_shift: 10
+        frame_length: 25
+        dither: 1.0
+    spec_aug: true
+    spec_aug_conf:
+        num_t_mask: 2
+        num_f_mask: 2
+        max_t: 50
+        max_f: 10
+    spec_trim: false
+    spec_trim_conf:
+        max_t: 70
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 1500
+    sort: true
+    sort_conf:
+        sort_size: 500  # sort_size should be less than shuffle_size
+    batch_conf:
+        batch_type: 'static' # static or dynamic
+        batch_size: 26
+
+grad_clip: 5
+accum_grad: 1
+max_epoch: 360
+log_interval: 100
+
+optim: adam
+optim_conf:
+    lr: 0.001
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 25000
--- a/examples/aishell/s0/conf/train_unified_conformer.yaml
+++ b/examples/aishell/s0/conf/train_unified_conformer.yaml
+# network architecture
+# encoder related
+encoder: conformer
+encoder_conf:
+    output_size: 256    # dimension of attention
+    attention_heads: 4
+    linear_units: 2048  # the number of units of position-wise feed forward
+    num_blocks: 12      # the number of encoder blocks
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.0
+    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
+    normalize_before: true
+    cnn_module_kernel: 15
+    use_cnn_module: True
+    activation_type: 'swish'
+    pos_enc_layer_type: 'rel_pos'
+    selfattention_layer_type: 'rel_selfattn'
+    causal: true
+    use_dynamic_chunk: true
+    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
+    use_dynamic_left_chunk: false
+
+# decoder related
+decoder: transformer
+decoder_conf:
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+
+dataset_conf:
+    filter_conf:
+        max_length: 40960
+        min_length: 0
+        token_max_length: 200
+        token_min_length: 1
+    resample_conf:
+        resample_rate: 16000
+    speed_perturb: true
+    fbank_conf:
+        num_mel_bins: 80
+        frame_shift: 10
+        frame_length: 25
+        dither: 0.1
+    spec_aug: true
+    spec_aug_conf:
+        num_t_mask: 2
+        num_f_mask: 2
+        max_t: 50
+        max_f: 10
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 1500
+    sort: true
+    sort_conf:
+        sort_size: 500  # sort_size should be less than shuffle_size
+    batch_conf:
+        batch_type: 'static' # static or dynamic
+        batch_size: 16
+
+grad_clip: 5
+accum_grad: 1
+max_epoch: 180
+log_interval: 100
+
+optim: adam
+optim_conf:
+    lr: 0.001
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 25000
--- a/examples/aishell/s0/conf/train_unified_transformer.yaml
+++ b/examples/aishell/s0/conf/train_unified_transformer.yaml
+# network architecture
+# encoder related
+encoder: transformer
+encoder_conf:
+    output_size: 256    # dimension of attention
+    attention_heads: 4
+    linear_units: 2048  # the number of units of position-wise feed forward
+    num_blocks: 12      # the number of encoder blocks
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.0
+    input_layer: conv2d # encoder architecture type
+    normalize_before: true
+    use_dynamic_chunk: true
+    use_dynamic_left_chunk: false
+
+# decoder related
+decoder: transformer
+decoder_conf:
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+
+dataset_conf:
+    filter_conf:
+        max_length: 40960
+        min_length: 0
+        token_max_length: 200
+        token_min_length: 1
+    resample_conf:
+        resample_rate: 16000
+    speed_perturb: true
+    fbank_conf:
+        num_mel_bins: 80
+        frame_shift: 10
+        frame_length: 25
+        dither: 0.1
+    spec_aug: true
+    spec_aug_conf:
+        num_t_mask: 2
+        num_f_mask: 2
+        max_t: 50
+        max_f: 10
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 1500
+    sort: true
+    sort_conf:
+        sort_size: 500  # sort_size should be less than shuffle_size
+    batch_conf:
+        batch_type: 'static' # static or dynamic
+        batch_size: 16
+
+
+grad_clip: 5
+accum_grad: 1
+max_epoch: 180
+log_interval: 100
+
+optim: adam
+optim_conf:
+    lr: 0.002
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 25000
--- a/examples/aishell/s0/local/aishell_data_prep.sh
+++ b/examples/aishell/s0/local/aishell_data_prep.sh
+#!/bin/bash
+
+# Copyright 2017 Xingyu Na
+# Apache 2.0
+
+. ./path.sh || exit 1;
+
+if [ $# != 2 ]; then
+  echo "Usage: $0 <audio-path> <text-path>"
+  echo " $0 /export/a05/xna/data/data_aishell/wav /export/a05/xna/data/data_aishell/transcript"
+  exit 1;
+fi
+
+aishell_audio_dir=$1
+aishell_text=$2/aishell_transcript_v0.8.txt
+
+train_dir=data/local/train
+dev_dir=data/local/dev
+test_dir=data/local/test
+tmp_dir=data/local/tmp
+
+mkdir -p $train_dir
+mkdir -p $dev_dir
+mkdir -p $test_dir
+mkdir -p $tmp_dir
+
+# data directory check
+if [ ! -d $aishell_audio_dir ] || [ ! -f $aishell_text ]; then
+  echo "Error: $0 requires two directory arguments"
+  exit 1;
+fi
+
+# find wav audio file for train, dev and test resp.
+find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
+n=`cat $tmp_dir/wav.flist | wc -l`
+[ $n -ne 141925 ] && \
+  echo Warning: expected 141925 data data files, found $n
+
+grep -i "wav/train" $tmp_dir/wav.flist > $train_dir/wav.flist || exit 1;
+grep -i "wav/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1;
+grep -i "wav/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1;
+
+rm -r $tmp_dir
+
+# Transcriptions preparation
+for dir in $train_dir $dev_dir $test_dir; do
+  echo Preparing $dir transcriptions
+  sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
+  paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all
+  tools/filter_scp.pl -f 1 $dir/utt.list $aishell_text > $dir/transcripts.txt
+  awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
+  tools/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp
+  sort -u $dir/transcripts.txt > $dir/text
+done
+
+mkdir -p data/train data/dev data/test
+
+for f in wav.scp text; do
+  cp $train_dir/$f data/train/$f || exit 1;
+  cp $dev_dir/$f data/dev/$f || exit 1;
+  cp $test_dir/$f data/test/$f || exit 1;
+done
+
+echo "$0: AISHELL data preparation succeeded"
+exit 0;
--- a/examples/aishell/s0/local/aishell_train_lms.sh
+++ b/examples/aishell/s0/local/aishell_train_lms.sh
+#!/bin/bash
+
+
+# To be run from one directory above this script.
+. ./path.sh
+
+text=data/local/lm/text
+lexicon=data/local/dict/lexicon.txt
+
+for f in "$text" "$lexicon"; do
+  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
+done
+
+# Check SRILM tools
+if ! which ngram-count > /dev/null; then
+    echo "srilm tools are not found, please download it and install it from: "
+    echo "http://www.speech.sri.com/projects/srilm/download.html"
+    echo "Then add the tools to your PATH"
+    exit 1
+fi
+
+# This script takes no arguments.  It assumes you have already run
+# aishell_data_prep.sh.
+# It takes as input the files
+# data/local/lm/text
+# data/local/dict/lexicon.txt
+dir=data/local/lm
+mkdir -p $dir
+
+
+cleantext=$dir/text.no_oov
+
+cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
+  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
+  > $cleantext || exit 1;
+
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
+   sort -nr > $dir/word.counts || exit 1;
+
+# Get counts from acoustic training transcripts, and add  one-count
+# for each word in the lexicon (but not silence, we don't want it
+# in the LM-- we'll add it optionally later).
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
+  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
+   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
+
+cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
+
+heldout_sent=10000 # Don't change this if you want result to be comparable with
+    # kaldi_lm results
+mkdir -p $dir
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
+  head -$heldout_sent > $dir/heldout
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
+  tail -n +$heldout_sent > $dir/train
+
+ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
+  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
+ngram -lm $dir/lm.arpa -ppl $dir/heldout
--- a/examples/aishell/s0/local/download_and_untar.sh
+++ b/examples/aishell/s0/local/download_and_untar.sh
+#!/bin/bash
+
+# Copyright   2014  Johns Hopkins University (author: Daniel Povey)
+#             2017  Xingyu Na
+# Apache 2.0
+
+remove_archive=false
+
+if [ "$1" == --remove-archive ]; then
+  remove_archive=true
+  shift
+fi
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
+  echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/33 data_aishell"
+  echo "With --remove-archive it will remove the archive after successfully un-tarring it."
+  echo "<corpus-part> can be one of: data_aishell, resource_aishell."
+fi
+
+data=$1
+url=$2
+part=$3
+
+if [ ! -d "$data" ]; then
+  echo "$0: no such directory $data"
+  exit 1;
+fi
+
+part_ok=false
+list="data_aishell resource_aishell"
+for x in $list; do
+  if [ "$part" == $x ]; then part_ok=true; fi
+done
+if ! $part_ok; then
+  echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
+  exit 1;
+fi
+
+if [ -z "$url" ]; then
+  echo "$0: empty URL base."
+  exit 1;
+fi
+
+if [ -f $data/$part/.complete ]; then
+  echo "$0: data part $part was already successfully extracted, nothing to do."
+  exit 0;
+fi
+
+# sizes of the archive files in bytes.
+sizes="15582913665 1246920"
+
+if [ -f $data/$part.tgz ]; then
+  size=$(/bin/ls -l $data/$part.tgz | awk '{print $5}')
+  size_ok=false
+  for s in $sizes; do if [ $s == $size ]; then size_ok=true; fi; done
+  if ! $size_ok; then
+    echo "$0: removing existing file $data/$part.tgz because its size in bytes $size"
+    echo "does not equal the size of one of the archives."
+    rm $data/$part.tgz
+  else
+    echo "$data/$part.tgz exists and appears to be complete."
+  fi
+fi
+
+if [ ! -f $data/$part.tgz ]; then
+  if ! which wget >/dev/null; then
+    echo "$0: wget is not installed."
+    exit 1;
+  fi
+  full_url=$url/$part.tgz
+  echo "$0: downloading data from $full_url.  This may take some time, please be patient."
+
+  cd $data
+  if ! wget --no-check-certificate $full_url; then
+    echo "$0: error executing wget $full_url"
+    exit 1;
+  fi
+fi
+
+cd $data
+
+if ! tar -xvzf $part.tgz; then
+  echo "$0: error un-tarring archive $data/$part.tgz"
+  exit 1;
+fi
+
+touch $data/$part/.complete
+
+if [ $part == "data_aishell" ]; then
+  cd $data/$part/wav
+  for wav in ./*.tar.gz; do
+    echo "Extracting wav from $wav"
+    tar -zxf $wav && rm $wav
+  done
+fi
+
+echo "$0: Successfully downloaded and un-tarred $data/$part.tgz"
+
+if $remove_archive; then
+  echo "$0: removing $data/$part.tgz file since --remove-archive option was supplied."
+  rm $data/$part.tgz
+fi
+
+exit 0;
--- a/examples/aishell/s0/path.sh
+++ b/examples/aishell/s0/path.sh
+export WENET_DIR=$PWD/../../..
+export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
+export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
+export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
+
+# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PYTHONPATH=../../../:$PYTHONPATH
--- a/examples/aishell/s0/run.sh
+++ b/examples/aishell/s0/run.sh
--- a/examples/aishell/s0/run_train.sh
+++ b/examples/aishell/s0/run_train.sh
--- a/examples/aishell/s0/run_validate.sh
+++ b/examples/aishell/s0/run_validate.sh
+#!/bin/bash
+
+# Copyright 2019 Mobvoi Inc. All Rights Reserved.
+. ./path.sh || exit 1;
+
+# Use this to control how many gpu you use, It's 1-gpu training if you specify
+# just 1gpu, otherwise it's is multiple gpu training based on DDP in pytorch
+export CUDA_VISIBLE_DEVICES="0,1,2,3"
+# The NCCL_SOCKET_IFNAME variable specifies which IP interface to use for nccl
+# communication. More details can be found in
+# https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
+# export NCCL_SOCKET_IFNAME=ens4f1
+export NCCL_DEBUG=INFO
+stage=5 # start from 0 if you need to start from data preparation
+stop_stage=5
+
+# The num of machines(nodes) for multi-machine training, 1 is for one machine.
+# NFS is required if num_nodes > 1.
+num_nodes=1
+
+# The rank of each node or machine, which ranges from 0 to `num_nodes - 1`.
+# You should set the node_rank=0 on the first machine, set the node_rank=1
+# on the second machine, and so on.
+node_rank=0
+# The aishell dataset location, please change this to your own path
+# make sure of using absolute path. DO-NOT-USE relatvie path!
+data=/data/conformer/train/
+data_url=www.openslr.org/resources/33
+
+nj=16
+dict=data/dict/lang_char.txt
+
+# data_type can be `raw` or `shard`. Typically, raw is used for small dataset,
+# `shard` is used for large dataset which is over 1k hours, and `shard` is
+# faster on reading data and training.
+data_type=raw
+num_utts_per_shard=1000
+
+train_set=train
+# Optional train_config
+# 1. conf/train_transformer.yaml: Standard transformer
+# 2. conf/train_conformer.yaml: Standard conformer
+# 3. conf/train_unified_conformer.yaml: Unified dynamic chunk causal conformer
+# 4. conf/train_unified_transformer.yaml: Unified dynamic chunk transformer
+# 5. conf/train_u2++_conformer.yaml: U2++ conformer
+# 6. conf/train_u2++_transformer.yaml: U2++ transformer
+train_config=conf/train_conformer.yaml
+cmvn=true
+dir=exp/conformer
+checkpoint=
+
+# use average_checkpoint will get better result
+average_checkpoint=false
+decode_checkpoint=$dir/final.pt
+average_num=30
+decode_modes="attention_rescoring"
+
+. tools/parse_options.sh || exit 1;
+
+if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
+  # Please specify decoding_chunk_size for unified streaming and
+  # non-streaming model. The default value is -1, which is full chunk
+  # for non-streaming inference.
+  decoding_chunk_size=
+  ctc_weight=0.5
+  reverse_weight=0.0
+  for mode in ${decode_modes}; do
+  {
+    test_dir=$dir/test_${mode}
+    mkdir -p $test_dir
+    python wenet/bin/recognize.py --gpu 0 \
+      --mode $mode \
+      --config $dir/train.yaml \
+      --data_type $data_type \
+      --test_data data/test/data.list \
+      --checkpoint $decode_checkpoint \
+      --beam_size 10 \
+      --batch_size 1 \
+      --penalty 0.0 \
+      --dict $dict \
+      --ctc_weight $ctc_weight \
+      --reverse_weight $reverse_weight \
+      --result_file $test_dir/text \
+      ${decoding_chunk_size:+--decoding_chunk_size $decoding_chunk_size}
+    
+    # The code of computing script below has been executed in the wenet/bin/train.py 
+    #python tools/compute-wer.py --char=1 --v=1 \
+    #  data/test/text $test_dir/text > $test_dir/wer
+  } &
+  done
+  wait
+fi
--- a/examples/aishell/s0/tools
+++ b/examples/aishell/s0/tools
+../../../tools/
\ No newline at end of file
--- a/examples/aishell/s0/train.sh
+++ b/examples/aishell/s0/train.sh
+#!/usr/bin/env bash
+CUR_DIR="$( cd "$(dirname "$0")" ; pwd )"
+cd $CUR_DIR
+bash run_train.sh --log_dir pid.txt 2>&1 | tee conformer_train_dcu_`date +%Y%m%d%H%M%S`.log
+
--- a/examples/aishell/s0/validate.sh
+++ b/examples/aishell/s0/validate.sh
+#!/usr/bin/env bash
+CUR_DIR="$( cd "$(dirname "$0")" ; pwd )"
+cd $CUR_DIR
+bash run_validate.sh
--- a/examples/aishell/s0/wenet
+++ b/examples/aishell/s0/wenet
+../../../wenet/
\ No newline at end of file
--- a/examples/aishell2/rnnt/README.md
+++ b/examples/aishell2/rnnt/README.md
--- a/examples/aishell2/rnnt/conf/conformer_rnnt.yaml
+++ b/examples/aishell2/rnnt/conf/conformer_rnnt.yaml
--- a/examples/aishell2/rnnt/conf/conformer_u2pp_rnnt.yaml
+++ b/examples/aishell2/rnnt/conf/conformer_u2pp_rnnt.yaml
--- a/examples/aishell2/rnnt/local
+++ b/examples/aishell2/rnnt/local
+../s0/local
\ No newline at end of file