parser.add_argument("--save_dir",default='./checkpoint',type=str,help="The output directory where the model checkpoints will be written.")
parser.add_argument("--max_seq_length",default=128,type=int,help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--batch_size",default=32,type=int,help="Batch size per GPU/CPU for training.")
parser.add_argument("--output_emb_size",default=0,type=int,help="Output_embedding_size, 0 means use hidden_size as output embedding size.")
parser.add_argument("--learning_rate",default=1e-5,type=float,help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay",default=0.0,type=float,help="Weight decay if we apply some.")
parser.add_argument("--epochs",default=1,type=int,help="Total number of training epochs to perform.")
parser.add_argument("--warmup_proportion",default=0.0,type=float,help="Linear warmup proportion over the training process.")
parser.add_argument("--init_from_ckpt",type=str,default=None,help="The path of checkpoint to be loaded.")
parser.add_argument("--seed",type=int,default=1000,help="Random seed for initialization.")
parser.add_argument('--device',choices=['cpu','gpu'],default="gpu",help="Select which device to train model, defaults to gpu.")
parser.add_argument('--save_steps',type=int,default=10000,help="Step interval for saving checkpoint.")
parser.add_argument("--max_steps",default=-1,type=int,help="If > 0: set total number of training steps to perform. Override ecpochs.")
parser.add_argument('--eval_steps',type=int,default=10000,help="Step interval for evaluation.")
parser.add_argument("--train_set_file",type=str,required=True,help="The full path of train_set_file.")
parser.add_argument("--margin",default=0.0,type=float,help="Margin between pos_sample and neg_samples.")
parser.add_argument("--scale",default=20,type=int,help="Scale for pair-wise margin_rank_loss.")
parser.add_argument("--is_unsupervised",action='store_true',help="Whether to use unsupervised training")
parser.add_argument("--dropout",default=0.1,type=float,help="Dropout for pretrained model encoder.")
parser.add_argument("--dup_rate",default=0.32,type=float,help="duplicate rate for word repetition.")
parser.add_argument("--infer_with_fc_pooler",action='store_true',help="Whether use fc layer after cls embedding or not for when infer.")
parser.add_argument('--model_name_or_path',default="rocketqa-zh-base-query-encoder",help="The pretrained model used for training")
parser.add_argument("--rdrop_coef",default=0.0,type=float,help="The coefficient of KL-Divergence loss in R-Drop paper, for more detail please refer to https://arxiv.org/abs/2106.14448), if rdrop_coef > 0 then R-Drop works")
default='inference.get_pooled_embedding.pdmodel',help="The name of file to load the inference program. If it is None, the default filename __model__ will be used.")
default='inference.get_pooled_embedding.pdiparams',help="The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.")
parser.add_argument("--max_seq_length",default=64,type=int,help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--batch_size",default=32,type=int,help="Batch size per GPU/CPU for training.")
parser.add_argument('--device',choices=['cpu','gpu','xpu'],default="gpu",help="Select which device to train model, defaults to gpu.")
parser.add_argument('--use_tensorrt',default=False,type=eval,choices=[True,False],help='Enable to use tensorrt to speed up.')
parser.add_argument("--corpus_file",type=str,required=True,help="The full path of input file")
parser.add_argument("--similar_text_pair_file",type=str,required=True,help="The full path of similar text pair file")
parser.add_argument("--recall_result_dir",type=str,default='recall_result',help="The full path of recall result file to save")
parser.add_argument("--recall_result_file",type=str,default='recall_result_file',help="The file name of recall result")
parser.add_argument("--params_path",type=str,required=True,help="The path to model parameters to be loaded.")
parser.add_argument("--max_seq_length",default=64,type=int,help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--batch_size",default=32,type=int,help="Batch size per GPU/CPU for training.")